{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.489853044086774, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 162.42857360839844, "epoch": 0.0006997900629811056, "grad_norm": 2.1621668338775635, "kl": 0.0, "learning_rate": 3.9999951667961485e-07, "loss": -0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 1 }, { "completion_length": 162.42857360839844, "epoch": 0.0013995801259622112, "grad_norm": 2.071769952774048, "kl": 0.0005087637691758573, "learning_rate": 3.9999806672079545e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "step": 2 }, { "completion_length": 151.2857208251953, "epoch": 0.002099370188943317, "grad_norm": 1.5806993246078491, "kl": 0.0004339259467087686, "learning_rate": 3.999956501305496e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 3 }, { "completion_length": 185.35714721679688, "epoch": 0.0027991602519244225, "grad_norm": 3.6436660289764404, "kl": 0.0005974674131721258, "learning_rate": 3.9999226692055735e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 4 }, { "completion_length": 205.42857360839844, "epoch": 0.0034989503149055285, "grad_norm": 2.561729669570923, "kl": 0.0006411899230442941, "learning_rate": 3.9998791710717035e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 5 }, { "completion_length": 167.07144165039062, "epoch": 0.004198740377886634, "grad_norm": 2.539142370223999, "kl": 0.0005450592143461108, "learning_rate": 3.9998260071141214e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 6 }, { "completion_length": 151.71429443359375, "epoch": 0.00489853044086774, "grad_norm": 3.034393548965454, "kl": 0.0005159592255949974, "learning_rate": 3.99976317758978e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 7 }, { "completion_length": 184.71429443359375, "epoch": 0.005598320503848845, "grad_norm": 1.3017289638519287, "kl": 0.000647695385850966, "learning_rate": 3.999690682802346e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 8 }, { "completion_length": 180.00001525878906, "epoch": 0.006298110566829951, "grad_norm": 1.3137158155441284, "kl": 0.0006165299564599991, "learning_rate": 3.999608523102203e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 9 }, { "completion_length": 169.85714721679688, "epoch": 0.006997900629811057, "grad_norm": 2.563966751098633, "kl": 0.0006137760356068611, "learning_rate": 3.999516698886445e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 10 }, { "completion_length": 181.42857360839844, "epoch": 0.007697690692792162, "grad_norm": 2.386747360229492, "kl": 0.0006042409222573042, "learning_rate": 3.9994152105988764e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 11 }, { "completion_length": 178.42857360839844, "epoch": 0.008397480755773267, "grad_norm": 2.401819944381714, "kl": 0.0007052791188471019, "learning_rate": 3.999304058730011e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 12 }, { "completion_length": 154.92857360839844, "epoch": 0.009097270818754374, "grad_norm": 2.1253886222839355, "kl": 0.000624538806732744, "learning_rate": 3.99918324381707e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "step": 13 }, { "completion_length": 188.57144165039062, "epoch": 0.00979706088173548, "grad_norm": 2.377917528152466, "kl": 0.0006567585514858365, "learning_rate": 3.9990527664439747e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 14 }, { "completion_length": 143.57144165039062, "epoch": 0.010496850944716585, "grad_norm": 3.380934476852417, "kl": 0.0009870762005448341, "learning_rate": 3.998912627241349e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 15 }, { "completion_length": 164.6428680419922, "epoch": 0.01119664100769769, "grad_norm": 2.6711244583129883, "kl": 0.0007087168050929904, "learning_rate": 3.998762826886515e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 16 }, { "completion_length": 153.85714721679688, "epoch": 0.011896431070678797, "grad_norm": 2.1802475452423096, "kl": 0.001023497898131609, "learning_rate": 3.9986033661034884e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "step": 17 }, { "completion_length": 184.07144165039062, "epoch": 0.012596221133659902, "grad_norm": 1.7185988426208496, "kl": 0.0007585805142298341, "learning_rate": 3.998434245662975e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 18 }, { "completion_length": 154.7857208251953, "epoch": 0.013296011196641007, "grad_norm": 2.969214677810669, "kl": 0.0007965491386130452, "learning_rate": 3.9982554663823683e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 19 }, { "completion_length": 176.71429443359375, "epoch": 0.013995801259622114, "grad_norm": 2.431178331375122, "kl": 0.0006497688591480255, "learning_rate": 3.998067029125746e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 20 }, { "completion_length": 176.85714721679688, "epoch": 0.01469559132260322, "grad_norm": 1.35764741897583, "kl": 0.0006702968385070562, "learning_rate": 3.997868934803863e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 21 }, { "completion_length": 153.7857208251953, "epoch": 0.015395381385584325, "grad_norm": 1.6852751970291138, "kl": 0.0009988198289647698, "learning_rate": 3.9976611843741495e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 22 }, { "completion_length": 156.1428680419922, "epoch": 0.01609517144856543, "grad_norm": 0.9442471265792847, "kl": 0.0010655602673068643, "learning_rate": 3.9974437788407063e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 23 }, { "completion_length": 175.7857208251953, "epoch": 0.016794961511546535, "grad_norm": 2.5566701889038086, "kl": 0.0009062529425136745, "learning_rate": 3.9972167192542977e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 24 }, { "completion_length": 193.6428680419922, "epoch": 0.01749475157452764, "grad_norm": 2.0270092487335205, "kl": 0.0006580596673302352, "learning_rate": 3.99698000671235e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 25 }, { "completion_length": 166.71429443359375, "epoch": 0.01819454163750875, "grad_norm": 2.980055809020996, "kl": 0.0011716507142409682, "learning_rate": 3.9967336423589423e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 26 }, { "completion_length": 185.1428680419922, "epoch": 0.018894331700489854, "grad_norm": 2.762129068374634, "kl": 0.0011772769503295422, "learning_rate": 3.996477627384804e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 27 }, { "completion_length": 169.42857360839844, "epoch": 0.01959412176347096, "grad_norm": 3.0474092960357666, "kl": 0.0014008829602971673, "learning_rate": 3.996211963027308e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 28 }, { "completion_length": 165.71429443359375, "epoch": 0.020293911826452064, "grad_norm": 0.002185442950576544, "kl": 0.0014958097599446774, "learning_rate": 3.9959366505704645e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "step": 29 }, { "completion_length": 152.07144165039062, "epoch": 0.02099370188943317, "grad_norm": 3.3163719177246094, "kl": 0.0014360197819769382, "learning_rate": 3.9956516913449133e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 30 }, { "completion_length": 156.57144165039062, "epoch": 0.021693491952414275, "grad_norm": 2.908538579940796, "kl": 0.0014536671806126833, "learning_rate": 3.9953570867279217e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 31 }, { "completion_length": 150.6428680419922, "epoch": 0.02239328201539538, "grad_norm": 5.655108451843262, "kl": 0.0020901458337903023, "learning_rate": 3.995052838143374e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 32 }, { "completion_length": 141.5, "epoch": 0.02309307207837649, "grad_norm": 3.3134255409240723, "kl": 0.0023269259836524725, "learning_rate": 3.994738947061765e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 33 }, { "completion_length": 162.21429443359375, "epoch": 0.023792862141357594, "grad_norm": 2.6442935466766357, "kl": 0.0024486398324370384, "learning_rate": 3.994415415000195e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 34 }, { "completion_length": 152.2857208251953, "epoch": 0.0244926522043387, "grad_norm": 2.7008283138275146, "kl": 0.002697949530556798, "learning_rate": 3.994082243522359e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "step": 35 }, { "completion_length": 148.0, "epoch": 0.025192442267319804, "grad_norm": 3.7234046459198, "kl": 0.003130936762318015, "learning_rate": 3.993739434238544e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 36 }, { "completion_length": 172.21429443359375, "epoch": 0.02589223233030091, "grad_norm": 1.788806438446045, "kl": 0.0019818381406366825, "learning_rate": 3.993386988805617e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 37 }, { "completion_length": 158.35714721679688, "epoch": 0.026592022393282014, "grad_norm": 4.708991050720215, "kl": 0.0029942409601062536, "learning_rate": 3.993024908927018e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 38 }, { "completion_length": 166.92857360839844, "epoch": 0.02729181245626312, "grad_norm": 1.553680419921875, "kl": 0.0026708380319178104, "learning_rate": 3.992653196352753e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 39 }, { "completion_length": 154.42857360839844, "epoch": 0.02799160251924423, "grad_norm": 1.7119766473770142, "kl": 0.005624197889119387, "learning_rate": 3.992271852879386e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 40 }, { "completion_length": 200.21429443359375, "epoch": 0.028691392582225334, "grad_norm": 1.4790523052215576, "kl": 0.0046756877563893795, "learning_rate": 3.991880880350026e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "step": 41 }, { "completion_length": 184.07144165039062, "epoch": 0.02939118264520644, "grad_norm": 1.7897188663482666, "kl": 0.005358730908483267, "learning_rate": 3.991480280654323e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 42 }, { "completion_length": 175.35714721679688, "epoch": 0.030090972708187544, "grad_norm": 1.5548423528671265, "kl": 0.007951375097036362, "learning_rate": 3.9910700557284576e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 43 }, { "completion_length": 155.6428680419922, "epoch": 0.03079076277116865, "grad_norm": 2.725590944290161, "kl": 0.008790958672761917, "learning_rate": 3.990650207555131e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 44 }, { "completion_length": 192.21429443359375, "epoch": 0.031490552834149754, "grad_norm": 2.051744222640991, "kl": 0.0023416366893798113, "learning_rate": 3.990220738163554e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 45 }, { "completion_length": 157.7857208251953, "epoch": 0.03219034289713086, "grad_norm": 2.221405267715454, "kl": 0.007256282493472099, "learning_rate": 3.9897816496294406e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 46 }, { "completion_length": 165.92857360839844, "epoch": 0.032890132960111965, "grad_norm": 2.4208436012268066, "kl": 0.009219921194016933, "learning_rate": 3.989332944074994e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 47 }, { "completion_length": 142.35714721679688, "epoch": 0.03358992302309307, "grad_norm": 2.5692453384399414, "kl": 0.011153223924338818, "learning_rate": 3.988874623668901e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 48 }, { "completion_length": 172.6428680419922, "epoch": 0.034289713086074175, "grad_norm": 3.8478963375091553, "kl": 0.005089076701551676, "learning_rate": 3.988406690626316e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 49 }, { "completion_length": 153.07144165039062, "epoch": 0.03498950314905528, "grad_norm": 1.5168204307556152, "kl": 0.01330635230988264, "learning_rate": 3.987929147208857e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 50 }, { "completion_length": 154.35714721679688, "epoch": 0.03568929321203639, "grad_norm": 2.3415682315826416, "kl": 0.018696671351790428, "learning_rate": 3.9874419957245866e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 51 }, { "completion_length": 168.92857360839844, "epoch": 0.0363890832750175, "grad_norm": 3.473439931869507, "kl": 0.014826941303908825, "learning_rate": 3.9869452385280085e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 52 }, { "completion_length": 148.35714721679688, "epoch": 0.0370888733379986, "grad_norm": 1.529994010925293, "kl": 0.0183956827968359, "learning_rate": 3.986438878020051e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 53 }, { "completion_length": 190.7857208251953, "epoch": 0.03778866340097971, "grad_norm": 0.7264643907546997, "kl": 0.010657703503966331, "learning_rate": 3.9859229166480574e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 54 }, { "completion_length": 146.1428680419922, "epoch": 0.03848845346396081, "grad_norm": 2.617443323135376, "kl": 0.011645260266959667, "learning_rate": 3.985397356905774e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 55 }, { "completion_length": 152.6428680419922, "epoch": 0.03918824352694192, "grad_norm": 4.20578670501709, "kl": 0.021926531568169594, "learning_rate": 3.984862201333339e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 56 }, { "completion_length": 163.1428680419922, "epoch": 0.03988803358992302, "grad_norm": 3.8608505725860596, "kl": 0.015257592312991619, "learning_rate": 3.984317452517268e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 57 }, { "completion_length": 135.07144165039062, "epoch": 0.04058782365290413, "grad_norm": 1.4822739362716675, "kl": 0.017864219844341278, "learning_rate": 3.983763113090443e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 58 }, { "completion_length": 152.42857360839844, "epoch": 0.041287613715885234, "grad_norm": 3.3999340534210205, "kl": 0.019019659608602524, "learning_rate": 3.9831991857320996e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "step": 59 }, { "completion_length": 183.35714721679688, "epoch": 0.04198740377886634, "grad_norm": 2.194365978240967, "kl": 0.0204194076359272, "learning_rate": 3.982625673167814e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 60 }, { "completion_length": 146.92857360839844, "epoch": 0.042687193841847444, "grad_norm": 3.065058469772339, "kl": 0.03266499191522598, "learning_rate": 3.982042578169488e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 61 }, { "completion_length": 171.6428680419922, "epoch": 0.04338698390482855, "grad_norm": 1.8983697891235352, "kl": 0.03760524466633797, "learning_rate": 3.9814499035553407e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 62 }, { "completion_length": 169.21429443359375, "epoch": 0.044086773967809655, "grad_norm": 0.006249432452023029, "kl": 0.012164854444563389, "learning_rate": 3.980847652189887e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 63 }, { "completion_length": 193.92857360839844, "epoch": 0.04478656403079076, "grad_norm": 3.6738457679748535, "kl": 0.01014101505279541, "learning_rate": 3.9802358269839326e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 64 }, { "completion_length": 168.7857208251953, "epoch": 0.04548635409377187, "grad_norm": 1.8547803163528442, "kl": 0.018301136791706085, "learning_rate": 3.9796144308945525e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 65 }, { "completion_length": 173.07144165039062, "epoch": 0.04618614415675298, "grad_norm": 2.4389915466308594, "kl": 0.014962972141802311, "learning_rate": 3.97898346692508e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 66 }, { "completion_length": 161.6428680419922, "epoch": 0.04688593421973408, "grad_norm": 2.174207925796509, "kl": 0.021594949066638947, "learning_rate": 3.9783429381250933e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 67 }, { "completion_length": 154.2857208251953, "epoch": 0.04758572428271519, "grad_norm": 3.0958943367004395, "kl": 0.029363546520471573, "learning_rate": 3.9776928475904e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 68 }, { "completion_length": 174.00001525878906, "epoch": 0.04828551434569629, "grad_norm": 0.006897146813571453, "kl": 0.012384669855237007, "learning_rate": 3.977033198463017e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 69 }, { "completion_length": 158.85714721679688, "epoch": 0.0489853044086774, "grad_norm": 3.0664570331573486, "kl": 0.022730212658643723, "learning_rate": 3.976363993931166e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 70 }, { "completion_length": 182.00001525878906, "epoch": 0.0496850944716585, "grad_norm": 4.6477837562561035, "kl": 0.016789477318525314, "learning_rate": 3.975685237229247e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "step": 71 }, { "completion_length": 156.21429443359375, "epoch": 0.05038488453463961, "grad_norm": 1.9391226768493652, "kl": 0.01738656684756279, "learning_rate": 3.974996931637831e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 72 }, { "completion_length": 179.1428680419922, "epoch": 0.05108467459762071, "grad_norm": 2.370103597640991, "kl": 0.020562436431646347, "learning_rate": 3.974299080483638e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 73 }, { "completion_length": 146.85714721679688, "epoch": 0.05178446466060182, "grad_norm": 3.4054484367370605, "kl": 0.012689548544585705, "learning_rate": 3.9735916871395254e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 74 }, { "completion_length": 168.7857208251953, "epoch": 0.052484254723582924, "grad_norm": 3.058872699737549, "kl": 0.020301401615142822, "learning_rate": 3.9728747550244695e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 75 }, { "completion_length": 173.00001525878906, "epoch": 0.05318404478656403, "grad_norm": 2.662966251373291, "kl": 0.013862676918506622, "learning_rate": 3.9721482876035494e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 76 }, { "completion_length": 155.7857208251953, "epoch": 0.053883834849545134, "grad_norm": 2.59771728515625, "kl": 0.01696676015853882, "learning_rate": 3.9714122883879304e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 77 }, { "completion_length": 132.35714721679688, "epoch": 0.05458362491252624, "grad_norm": 3.342616081237793, "kl": 0.02708171308040619, "learning_rate": 3.9706667609348455e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 78 }, { "completion_length": 189.6428680419922, "epoch": 0.055283414975507345, "grad_norm": 2.427316427230835, "kl": 0.013700015842914581, "learning_rate": 3.969911708847582e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 79 }, { "completion_length": 151.42857360839844, "epoch": 0.05598320503848846, "grad_norm": 2.5900068283081055, "kl": 0.022396380081772804, "learning_rate": 3.9691471357754615e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 80 }, { "completion_length": 143.6428680419922, "epoch": 0.05668299510146956, "grad_norm": 2.744997024536133, "kl": 0.03789540007710457, "learning_rate": 3.968373045413819e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 81 }, { "completion_length": 164.6428680419922, "epoch": 0.05738278516445067, "grad_norm": 3.3995862007141113, "kl": 0.016367558389902115, "learning_rate": 3.967589441503993e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 82 }, { "completion_length": 171.6428680419922, "epoch": 0.05808257522743177, "grad_norm": 1.4792157411575317, "kl": 0.033240318298339844, "learning_rate": 3.9667963278333005e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 83 }, { "completion_length": 191.00001525878906, "epoch": 0.05878236529041288, "grad_norm": 1.0607832670211792, "kl": 0.009467127732932568, "learning_rate": 3.965993708235021e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 84 }, { "completion_length": 150.5, "epoch": 0.05948215535339398, "grad_norm": 2.3551836013793945, "kl": 0.04632449522614479, "learning_rate": 3.965181586588379e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 85 }, { "completion_length": 162.21429443359375, "epoch": 0.06018194541637509, "grad_norm": 2.594930410385132, "kl": 0.039581455290317535, "learning_rate": 3.964359966818524e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 86 }, { "completion_length": 149.1428680419922, "epoch": 0.06088173547935619, "grad_norm": 5.078446388244629, "kl": 0.04121645539999008, "learning_rate": 3.963528852896512e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 87 }, { "completion_length": 177.42857360839844, "epoch": 0.0615815255423373, "grad_norm": 4.284796237945557, "kl": 0.015902357175946236, "learning_rate": 3.962688248839286e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 88 }, { "completion_length": 174.1428680419922, "epoch": 0.0622813156053184, "grad_norm": 2.436661720275879, "kl": 0.022723663598299026, "learning_rate": 3.961838158709656e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 89 }, { "completion_length": 169.92857360839844, "epoch": 0.06298110566829951, "grad_norm": 3.116161823272705, "kl": 0.02194351516664028, "learning_rate": 3.9609785866162825e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 90 }, { "completion_length": 180.00001525878906, "epoch": 0.06368089573128062, "grad_norm": 1.5481808185577393, "kl": 0.01771431416273117, "learning_rate": 3.96010953671365e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 91 }, { "completion_length": 161.57144165039062, "epoch": 0.06438068579426172, "grad_norm": 2.0321083068847656, "kl": 0.028998972848057747, "learning_rate": 3.959231013202057e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 92 }, { "completion_length": 171.71429443359375, "epoch": 0.06508047585724283, "grad_norm": 1.2014678716659546, "kl": 0.026206741109490395, "learning_rate": 3.958343020327585e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 93 }, { "completion_length": 191.6428680419922, "epoch": 0.06578026592022393, "grad_norm": 2.0394017696380615, "kl": 0.014384634792804718, "learning_rate": 3.957445562382084e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 94 }, { "completion_length": 149.7857208251953, "epoch": 0.06648005598320504, "grad_norm": 5.294133186340332, "kl": 0.028008148074150085, "learning_rate": 3.9565386437031525e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 95 }, { "completion_length": 174.6428680419922, "epoch": 0.06717984604618614, "grad_norm": 1.9654920101165771, "kl": 0.02754833921790123, "learning_rate": 3.955622268674113e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 96 }, { "completion_length": 131.35714721679688, "epoch": 0.06787963610916725, "grad_norm": 0.013575117103755474, "kl": 0.05109896883368492, "learning_rate": 3.954696441723992e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 97 }, { "completion_length": 208.42857360839844, "epoch": 0.06857942617214835, "grad_norm": 0.0030638063326478004, "kl": 0.016210131347179413, "learning_rate": 3.9537611673275013e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 98 }, { "completion_length": 183.92857360839844, "epoch": 0.06927921623512946, "grad_norm": 2.51125168800354, "kl": 0.023246267810463905, "learning_rate": 3.9528164500050114e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 99 }, { "completion_length": 180.57144165039062, "epoch": 0.06997900629811056, "grad_norm": 2.352588415145874, "kl": 0.0423022024333477, "learning_rate": 3.951862294322534e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 100 }, { "completion_length": 157.57144165039062, "epoch": 0.07067879636109167, "grad_norm": 1.5895036458969116, "kl": 0.03580068424344063, "learning_rate": 3.9508987048916987e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 101 }, { "completion_length": 168.35714721679688, "epoch": 0.07137858642407278, "grad_norm": 0.005004839040338993, "kl": 0.02044336311519146, "learning_rate": 3.949925686369729e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 102 }, { "completion_length": 188.00001525878906, "epoch": 0.07207837648705388, "grad_norm": 0.6585327982902527, "kl": 0.028955884277820587, "learning_rate": 3.948943243459422e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 103 }, { "completion_length": 149.71429443359375, "epoch": 0.072778166550035, "grad_norm": 2.492542266845703, "kl": 0.01728636771440506, "learning_rate": 3.947951380909125e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 104 }, { "completion_length": 189.00001525878906, "epoch": 0.0734779566130161, "grad_norm": 0.7788301110267639, "kl": 0.017463039606809616, "learning_rate": 3.946950103512711e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 105 }, { "completion_length": 198.50001525878906, "epoch": 0.0741777466759972, "grad_norm": 2.385542154312134, "kl": 0.009109357371926308, "learning_rate": 3.945939416109558e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 106 }, { "completion_length": 168.0, "epoch": 0.0748775367389783, "grad_norm": 0.005704585462808609, "kl": 0.03086216188967228, "learning_rate": 3.944919323584525e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 107 }, { "completion_length": 179.1428680419922, "epoch": 0.07557732680195942, "grad_norm": 1.7662243843078613, "kl": 0.022440915927290916, "learning_rate": 3.943889830867926e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 108 }, { "completion_length": 159.5, "epoch": 0.07627711686494051, "grad_norm": 2.5273375511169434, "kl": 0.03617139905691147, "learning_rate": 3.94285094293551e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 109 }, { "completion_length": 212.21429443359375, "epoch": 0.07697690692792163, "grad_norm": 0.013249583542346954, "kl": 0.021984193474054337, "learning_rate": 3.941802664808434e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 110 }, { "completion_length": 175.85714721679688, "epoch": 0.07767669699090272, "grad_norm": 4.031686782836914, "kl": 0.032846599817276, "learning_rate": 3.94074500155324e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 111 }, { "completion_length": 187.07144165039062, "epoch": 0.07837648705388384, "grad_norm": 0.007363871671259403, "kl": 0.024431224912405014, "learning_rate": 3.9396779582818294e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 112 }, { "completion_length": 198.07144165039062, "epoch": 0.07907627711686493, "grad_norm": 4.20402717590332, "kl": 0.015820473432540894, "learning_rate": 3.9386015401514403e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 113 }, { "completion_length": 196.21429443359375, "epoch": 0.07977606717984605, "grad_norm": 3.4518990516662598, "kl": 0.04174185171723366, "learning_rate": 3.937515752364621e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 114 }, { "completion_length": 185.6428680419922, "epoch": 0.08047585724282715, "grad_norm": 0.0049303495325148106, "kl": 0.026473047211766243, "learning_rate": 3.936420600169205e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 115 }, { "completion_length": 164.2857208251953, "epoch": 0.08117564730580826, "grad_norm": 0.13479961454868317, "kl": 0.020081352442502975, "learning_rate": 3.935316088858287e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 116 }, { "completion_length": 180.7857208251953, "epoch": 0.08187543736878937, "grad_norm": 0.0093807028606534, "kl": 0.03584766387939453, "learning_rate": 3.9342022237701944e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 117 }, { "completion_length": 185.00001525878906, "epoch": 0.08257522743177047, "grad_norm": 0.01810525357723236, "kl": 0.04101763665676117, "learning_rate": 3.933079010288464e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 118 }, { "completion_length": 186.6428680419922, "epoch": 0.08327501749475158, "grad_norm": 0.00445236312225461, "kl": 0.021929247304797173, "learning_rate": 3.931946453841817e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 119 }, { "completion_length": 187.57144165039062, "epoch": 0.08397480755773268, "grad_norm": 2.281690835952759, "kl": 0.017530912533402443, "learning_rate": 3.9308045599041273e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 120 }, { "completion_length": 210.92857360839844, "epoch": 0.08467459762071379, "grad_norm": 0.003424593713134527, "kl": 0.008928514085710049, "learning_rate": 3.9296533339944037e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 121 }, { "completion_length": 184.21429443359375, "epoch": 0.08537438768369489, "grad_norm": 2.1026535034179688, "kl": 0.020458171144127846, "learning_rate": 3.928492781676753e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 122 }, { "completion_length": 199.07144165039062, "epoch": 0.086074177746676, "grad_norm": 1.0709642171859741, "kl": 0.010702777653932571, "learning_rate": 3.927322908560363e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 123 }, { "completion_length": 170.6428680419922, "epoch": 0.0867739678096571, "grad_norm": 1.365310549736023, "kl": 0.025576891377568245, "learning_rate": 3.926143720299469e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 124 }, { "completion_length": 205.85714721679688, "epoch": 0.08747375787263821, "grad_norm": 2.0331332683563232, "kl": 0.01353074237704277, "learning_rate": 3.9249552225933275e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 125 }, { "completion_length": 193.57144165039062, "epoch": 0.08817354793561931, "grad_norm": 1.3904906511306763, "kl": 0.026216160506010056, "learning_rate": 3.92375742118619e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 126 }, { "completion_length": 141.1428680419922, "epoch": 0.08887333799860042, "grad_norm": 0.0112815722823143, "kl": 0.025340013206005096, "learning_rate": 3.922550321867275e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 127 }, { "completion_length": 141.6428680419922, "epoch": 0.08957312806158152, "grad_norm": 4.012102127075195, "kl": 0.04771756753325462, "learning_rate": 3.9213339304707405e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 128 }, { "completion_length": 206.57144165039062, "epoch": 0.09027291812456263, "grad_norm": 2.5189507007598877, "kl": 0.01796441338956356, "learning_rate": 3.920108252875653e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 129 }, { "completion_length": 165.21429443359375, "epoch": 0.09097270818754374, "grad_norm": 2.2849366664886475, "kl": 0.047430619597435, "learning_rate": 3.9188732950059626e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 130 }, { "completion_length": 191.21429443359375, "epoch": 0.09167249825052484, "grad_norm": 1.8176424503326416, "kl": 0.01865430921316147, "learning_rate": 3.9176290628304724e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 131 }, { "completion_length": 184.21429443359375, "epoch": 0.09237228831350595, "grad_norm": 1.6426111459732056, "kl": 0.030585745349526405, "learning_rate": 3.91637556236281e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 132 }, { "completion_length": 175.57144165039062, "epoch": 0.09307207837648705, "grad_norm": 1.48448646068573, "kl": 0.024254297837615013, "learning_rate": 3.9151127996613994e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 133 }, { "completion_length": 204.1428680419922, "epoch": 0.09377186843946816, "grad_norm": 1.7606929540634155, "kl": 0.0255893561989069, "learning_rate": 3.9138407808294287e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 134 }, { "completion_length": 189.57144165039062, "epoch": 0.09447165850244926, "grad_norm": 0.007255424279719591, "kl": 0.020462684333324432, "learning_rate": 3.912559512014826e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 135 }, { "completion_length": 170.6428680419922, "epoch": 0.09517144856543037, "grad_norm": 0.008822077885270119, "kl": 0.02918427065014839, "learning_rate": 3.9112689994102233e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 136 }, { "completion_length": 194.7857208251953, "epoch": 0.09587123862841147, "grad_norm": 1.4406437873840332, "kl": 0.0185464546084404, "learning_rate": 3.9099692492529324e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 137 }, { "completion_length": 186.92857360839844, "epoch": 0.09657102869139259, "grad_norm": 0.8168829083442688, "kl": 0.022399934008717537, "learning_rate": 3.908660267824909e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 138 }, { "completion_length": 197.50001525878906, "epoch": 0.09727081875437368, "grad_norm": 3.561955690383911, "kl": 0.019386975094676018, "learning_rate": 3.9073420614527284e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 139 }, { "completion_length": 185.50001525878906, "epoch": 0.0979706088173548, "grad_norm": 3.1612038612365723, "kl": 0.023975731804966927, "learning_rate": 3.9060146365075506e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 140 }, { "completion_length": 173.21429443359375, "epoch": 0.0986703988803359, "grad_norm": 2.205050468444824, "kl": 0.028390221297740936, "learning_rate": 3.9046779994050905e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 141 }, { "completion_length": 182.2857208251953, "epoch": 0.099370188943317, "grad_norm": 1.76483154296875, "kl": 0.024142036214470863, "learning_rate": 3.903332156605588e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 142 }, { "completion_length": 202.42857360839844, "epoch": 0.1000699790062981, "grad_norm": 1.3527218103408813, "kl": 0.02578994818031788, "learning_rate": 3.9019771146137757e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 143 }, { "completion_length": 184.50001525878906, "epoch": 0.10076976906927922, "grad_norm": 0.004584628622978926, "kl": 0.012519586831331253, "learning_rate": 3.9006128799788475e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 144 }, { "completion_length": 212.92857360839844, "epoch": 0.10146955913226033, "grad_norm": 0.5000512599945068, "kl": 0.00770693551748991, "learning_rate": 3.899239459294428e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 145 }, { "completion_length": 200.92857360839844, "epoch": 0.10216934919524143, "grad_norm": 0.9776932597160339, "kl": 0.01647227071225643, "learning_rate": 3.897856859198539e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 146 }, { "completion_length": 190.85714721679688, "epoch": 0.10286913925822254, "grad_norm": 0.9597777128219604, "kl": 0.02409287542104721, "learning_rate": 3.896465086373569e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 147 }, { "completion_length": 232.2857208251953, "epoch": 0.10356892932120364, "grad_norm": 0.0023802323266863823, "kl": 0.00452839769423008, "learning_rate": 3.8950641475462394e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 148 }, { "completion_length": 150.35714721679688, "epoch": 0.10426871938418475, "grad_norm": 0.00779814412817359, "kl": 0.032739609479904175, "learning_rate": 3.893654049487573e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 149 }, { "completion_length": 179.2857208251953, "epoch": 0.10496850944716585, "grad_norm": 2.1413512229919434, "kl": 0.016808386892080307, "learning_rate": 3.8922347990128616e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 150 }, { "completion_length": 173.42857360839844, "epoch": 0.10566829951014696, "grad_norm": 1.4173649549484253, "kl": 0.028368493542075157, "learning_rate": 3.8908064029816315e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 151 }, { "completion_length": 171.7857208251953, "epoch": 0.10636808957312806, "grad_norm": 0.8318215608596802, "kl": 0.022898903116583824, "learning_rate": 3.889368868297612e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 152 }, { "completion_length": 159.6428680419922, "epoch": 0.10706787963610917, "grad_norm": 0.011626004241406918, "kl": 0.032634247094392776, "learning_rate": 3.887922201908703e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 153 }, { "completion_length": 182.85714721679688, "epoch": 0.10776766969909027, "grad_norm": 1.0654501914978027, "kl": 0.016384759917855263, "learning_rate": 3.886466410806935e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 154 }, { "completion_length": 175.50001525878906, "epoch": 0.10846745976207138, "grad_norm": 2.0227701663970947, "kl": 0.017170391976833344, "learning_rate": 3.885001502028445e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 155 }, { "completion_length": 189.50001525878906, "epoch": 0.10916724982505248, "grad_norm": 2.12776255607605, "kl": 0.01205319631844759, "learning_rate": 3.8835274826534363e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 156 }, { "completion_length": 202.2857208251953, "epoch": 0.10986703988803359, "grad_norm": 1.1410002708435059, "kl": 0.015749162063002586, "learning_rate": 3.882044359806143e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 157 }, { "completion_length": 174.07144165039062, "epoch": 0.11056682995101469, "grad_norm": 1.730547308921814, "kl": 0.018030354753136635, "learning_rate": 3.8805521406548025e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 158 }, { "completion_length": 164.6428680419922, "epoch": 0.1112666200139958, "grad_norm": 0.00942758284509182, "kl": 0.026347342878580093, "learning_rate": 3.879050832411613e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 159 }, { "completion_length": 178.21429443359375, "epoch": 0.11196641007697691, "grad_norm": 0.003551044035702944, "kl": 0.012404139153659344, "learning_rate": 3.8775404423327025e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 160 }, { "completion_length": 176.6428680419922, "epoch": 0.11266620013995801, "grad_norm": 0.8654048442840576, "kl": 0.018997181206941605, "learning_rate": 3.876020977718095e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 161 }, { "completion_length": 173.35714721679688, "epoch": 0.11336599020293912, "grad_norm": 1.4962221384048462, "kl": 0.014478221535682678, "learning_rate": 3.874492445911673e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 162 }, { "completion_length": 168.07144165039062, "epoch": 0.11406578026592022, "grad_norm": 0.006159429904073477, "kl": 0.01945115439593792, "learning_rate": 3.872954854301142e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 163 }, { "completion_length": 191.92857360839844, "epoch": 0.11476557032890133, "grad_norm": 0.44639861583709717, "kl": 0.00815486814826727, "learning_rate": 3.8714082103179955e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 164 }, { "completion_length": 156.57144165039062, "epoch": 0.11546536039188243, "grad_norm": 1.8552296161651611, "kl": 0.024526473134756088, "learning_rate": 3.8698525214374793e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 165 }, { "completion_length": 167.5, "epoch": 0.11616515045486354, "grad_norm": 1.124375581741333, "kl": 0.02470785565674305, "learning_rate": 3.868287795178555e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 166 }, { "completion_length": 223.57144165039062, "epoch": 0.11686494051784464, "grad_norm": 0.9268680810928345, "kl": 0.012983305379748344, "learning_rate": 3.866714039103864e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 167 }, { "completion_length": 187.50001525878906, "epoch": 0.11756473058082575, "grad_norm": 1.2714390754699707, "kl": 0.026302574202418327, "learning_rate": 3.865131260819689e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 168 }, { "completion_length": 209.50001525878906, "epoch": 0.11826452064380685, "grad_norm": 0.003561601508408785, "kl": 0.01157893892377615, "learning_rate": 3.8635394679759215e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 169 }, { "completion_length": 185.92857360839844, "epoch": 0.11896431070678797, "grad_norm": 0.9095775485038757, "kl": 0.02227042429149151, "learning_rate": 3.8619386682660197e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 170 }, { "completion_length": 160.21429443359375, "epoch": 0.11966410076976906, "grad_norm": 2.749595880508423, "kl": 0.022995274513959885, "learning_rate": 3.8603288694269744e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 171 }, { "completion_length": 190.42857360839844, "epoch": 0.12036389083275018, "grad_norm": 0.002630816772580147, "kl": 0.010235711000859737, "learning_rate": 3.858710079239274e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 172 }, { "completion_length": 164.0, "epoch": 0.12106368089573127, "grad_norm": 1.5362565517425537, "kl": 0.025623667985200882, "learning_rate": 3.85708230552686e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 173 }, { "completion_length": 206.6428680419922, "epoch": 0.12176347095871239, "grad_norm": 1.2382439374923706, "kl": 0.012417357414960861, "learning_rate": 3.855445556157093e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 174 }, { "completion_length": 175.7857208251953, "epoch": 0.1224632610216935, "grad_norm": 0.5511037111282349, "kl": 0.02940467558801174, "learning_rate": 3.853799839040719e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 175 }, { "completion_length": 183.71429443359375, "epoch": 0.1231630510846746, "grad_norm": 0.010460534133017063, "kl": 0.026606591418385506, "learning_rate": 3.8521451621318233e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 176 }, { "completion_length": 190.57144165039062, "epoch": 0.12386284114765571, "grad_norm": 0.005347545258700848, "kl": 0.022543279454112053, "learning_rate": 3.850481533427796e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 177 }, { "completion_length": 214.42857360839844, "epoch": 0.1245626312106368, "grad_norm": 1.7615028619766235, "kl": 0.01479637436568737, "learning_rate": 3.848808960969295e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 178 }, { "completion_length": 174.6428680419922, "epoch": 0.12526242127361792, "grad_norm": 1.4247933626174927, "kl": 0.024751055985689163, "learning_rate": 3.8471274528402027e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 179 }, { "completion_length": 206.7857208251953, "epoch": 0.12596221133659902, "grad_norm": 1.9691680669784546, "kl": 0.01430103275924921, "learning_rate": 3.845437017167592e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 180 }, { "completion_length": 203.92857360839844, "epoch": 0.12666200139958012, "grad_norm": 1.9248921871185303, "kl": 0.010852116160094738, "learning_rate": 3.843737662121682e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 181 }, { "completion_length": 175.92857360839844, "epoch": 0.12736179146256124, "grad_norm": 1.3298709392547607, "kl": 0.02915351092815399, "learning_rate": 3.8420293959158023e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 182 }, { "completion_length": 176.50001525878906, "epoch": 0.12806158152554234, "grad_norm": 0.0045565650798380375, "kl": 0.015465345233678818, "learning_rate": 3.840312226806352e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 183 }, { "completion_length": 172.00001525878906, "epoch": 0.12876137158852344, "grad_norm": 0.007714143954217434, "kl": 0.023726962506771088, "learning_rate": 3.83858616309276e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 184 }, { "completion_length": 182.2857208251953, "epoch": 0.12946116165150454, "grad_norm": 1.1713811159133911, "kl": 0.027626153081655502, "learning_rate": 3.8368512131174423e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 185 }, { "completion_length": 190.2857208251953, "epoch": 0.13016095171448566, "grad_norm": 0.005629860796034336, "kl": 0.020982127636671066, "learning_rate": 3.835107385265767e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 186 }, { "completion_length": 186.35714721679688, "epoch": 0.13086074177746676, "grad_norm": 0.008394072763621807, "kl": 0.016412734985351562, "learning_rate": 3.83335468796601e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 187 }, { "completion_length": 198.35714721679688, "epoch": 0.13156053184044786, "grad_norm": 0.6422938108444214, "kl": 0.01465323381125927, "learning_rate": 3.831593129689314e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 188 }, { "completion_length": 199.07144165039062, "epoch": 0.13226032190342898, "grad_norm": 0.7808562517166138, "kl": 0.011609447188675404, "learning_rate": 3.8298227189496494e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 189 }, { "completion_length": 197.21429443359375, "epoch": 0.13296011196641008, "grad_norm": 0.5544365644454956, "kl": 0.00919759925454855, "learning_rate": 3.8280434643037723e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 190 }, { "completion_length": 213.35714721679688, "epoch": 0.13365990202939118, "grad_norm": 1.302751898765564, "kl": 0.017147762700915337, "learning_rate": 3.8262553743511827e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 191 }, { "completion_length": 182.85714721679688, "epoch": 0.13435969209237228, "grad_norm": 1.3094489574432373, "kl": 0.018484842032194138, "learning_rate": 3.824458457734084e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 192 }, { "completion_length": 172.57144165039062, "epoch": 0.1350594821553534, "grad_norm": 0.009919717907905579, "kl": 0.031512077897787094, "learning_rate": 3.8226527231373406e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 193 }, { "completion_length": 168.21429443359375, "epoch": 0.1357592722183345, "grad_norm": 0.006084138061851263, "kl": 0.018543435260653496, "learning_rate": 3.8208381792884364e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 194 }, { "completion_length": 152.5, "epoch": 0.1364590622813156, "grad_norm": 1.1117351055145264, "kl": 0.02704041823744774, "learning_rate": 3.819014834957431e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 195 }, { "completion_length": 192.6428680419922, "epoch": 0.1371588523442967, "grad_norm": 0.8523726463317871, "kl": 0.01723356544971466, "learning_rate": 3.8171826989569195e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 196 }, { "completion_length": 190.57144165039062, "epoch": 0.13785864240727783, "grad_norm": 0.35283270478248596, "kl": 0.009949599392712116, "learning_rate": 3.8153417801419894e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 197 }, { "completion_length": 152.1428680419922, "epoch": 0.13855843247025892, "grad_norm": 1.2546861171722412, "kl": 0.0401478074491024, "learning_rate": 3.813492087410175e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 198 }, { "completion_length": 195.07144165039062, "epoch": 0.13925822253324002, "grad_norm": 1.291770339012146, "kl": 0.014195875264704227, "learning_rate": 3.811633629701419e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 199 }, { "completion_length": 162.5, "epoch": 0.13995801259622112, "grad_norm": 2.0312585830688477, "kl": 0.024253856390714645, "learning_rate": 3.809766415998027e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 200 }, { "completion_length": 209.35714721679688, "epoch": 0.14065780265920225, "grad_norm": 0.9857494235038757, "kl": 0.012689170427620411, "learning_rate": 3.807890455324623e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 201 }, { "completion_length": 212.35714721679688, "epoch": 0.14135759272218335, "grad_norm": 0.9701467156410217, "kl": 0.009737645275890827, "learning_rate": 3.8060057567481074e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 202 }, { "completion_length": 172.1428680419922, "epoch": 0.14205738278516444, "grad_norm": 0.007986516691744328, "kl": 0.025673676282167435, "learning_rate": 3.804112329377613e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 203 }, { "completion_length": 144.71429443359375, "epoch": 0.14275717284814557, "grad_norm": 0.013815109618008137, "kl": 0.0411306768655777, "learning_rate": 3.80221018236446e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 204 }, { "completion_length": 209.21429443359375, "epoch": 0.14345696291112667, "grad_norm": 0.9316285848617554, "kl": 0.018284132704138756, "learning_rate": 3.800299324902112e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 205 }, { "completion_length": 190.07144165039062, "epoch": 0.14415675297410777, "grad_norm": 1.9075722694396973, "kl": 0.01683659665286541, "learning_rate": 3.7983797662261327e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 206 }, { "completion_length": 179.6428680419922, "epoch": 0.14485654303708886, "grad_norm": 0.005668825004249811, "kl": 0.016565389931201935, "learning_rate": 3.7964515156141415e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 207 }, { "completion_length": 179.42857360839844, "epoch": 0.14555633310007, "grad_norm": 0.005222757812589407, "kl": 0.017875386402010918, "learning_rate": 3.7945145823857664e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 208 }, { "completion_length": 158.1428680419922, "epoch": 0.1462561231630511, "grad_norm": 0.009748779237270355, "kl": 0.03730550408363342, "learning_rate": 3.7925689759026003e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 209 }, { "completion_length": 180.92857360839844, "epoch": 0.1469559132260322, "grad_norm": 1.2260764837265015, "kl": 0.017031870782375336, "learning_rate": 3.790614705568156e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 210 }, { "completion_length": 192.07144165039062, "epoch": 0.14765570328901328, "grad_norm": 0.00429878244176507, "kl": 0.015482224524021149, "learning_rate": 3.78865178082782e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 211 }, { "completion_length": 207.2857208251953, "epoch": 0.1483554933519944, "grad_norm": 0.6578147411346436, "kl": 0.013750012032687664, "learning_rate": 3.786680211168808e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 212 }, { "completion_length": 181.6428680419922, "epoch": 0.1490552834149755, "grad_norm": 1.0011402368545532, "kl": 0.019525719806551933, "learning_rate": 3.784700006120118e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 213 }, { "completion_length": 212.7857208251953, "epoch": 0.1497550734779566, "grad_norm": 2.0065784454345703, "kl": 0.022760922089219093, "learning_rate": 3.782711175252486e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 214 }, { "completion_length": 169.1428680419922, "epoch": 0.1504548635409377, "grad_norm": 1.3377941846847534, "kl": 0.02202622964978218, "learning_rate": 3.780713728178335e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 215 }, { "completion_length": 188.6428680419922, "epoch": 0.15115465360391883, "grad_norm": 0.2843207120895386, "kl": 0.020674534142017365, "learning_rate": 3.7787076745517344e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 216 }, { "completion_length": 151.35714721679688, "epoch": 0.15185444366689993, "grad_norm": 1.662339210510254, "kl": 0.035333938896656036, "learning_rate": 3.776693024068351e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 217 }, { "completion_length": 217.2857208251953, "epoch": 0.15255423372988103, "grad_norm": 0.005306210834532976, "kl": 0.013570250011980534, "learning_rate": 3.774669786465401e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 218 }, { "completion_length": 201.50001525878906, "epoch": 0.15325402379286215, "grad_norm": 0.004157788120210171, "kl": 0.018352538347244263, "learning_rate": 3.772637971521604e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 219 }, { "completion_length": 187.07144165039062, "epoch": 0.15395381385584325, "grad_norm": 0.006472014356404543, "kl": 0.019100595265626907, "learning_rate": 3.770597589057136e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 220 }, { "completion_length": 172.2857208251953, "epoch": 0.15465360391882435, "grad_norm": 0.5791244506835938, "kl": 0.02023283950984478, "learning_rate": 3.7685486489335803e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 221 }, { "completion_length": 196.50001525878906, "epoch": 0.15535339398180545, "grad_norm": 0.002360024955123663, "kl": 0.011786316521465778, "learning_rate": 3.766491161053884e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 222 }, { "completion_length": 227.21429443359375, "epoch": 0.15605318404478657, "grad_norm": 0.4446110427379608, "kl": 0.008062051609158516, "learning_rate": 3.764425135362304e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 223 }, { "completion_length": 150.85714721679688, "epoch": 0.15675297410776767, "grad_norm": 1.8167444467544556, "kl": 0.03246838599443436, "learning_rate": 3.7623505818443656e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 224 }, { "completion_length": 216.42857360839844, "epoch": 0.15745276417074877, "grad_norm": 0.9097755551338196, "kl": 0.014242643490433693, "learning_rate": 3.760267510526806e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "step": 225 }, { "completion_length": 168.42857360839844, "epoch": 0.15815255423372987, "grad_norm": 0.3198361098766327, "kl": 0.025004452094435692, "learning_rate": 3.758175931477536e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 226 }, { "completion_length": 178.07144165039062, "epoch": 0.158852344296711, "grad_norm": 0.6277005672454834, "kl": 0.02422436699271202, "learning_rate": 3.7560758548055825e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 227 }, { "completion_length": 169.5, "epoch": 0.1595521343596921, "grad_norm": 1.8747457265853882, "kl": 0.034696850925683975, "learning_rate": 3.753967290661044e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 228 }, { "completion_length": 163.42857360839844, "epoch": 0.1602519244226732, "grad_norm": 0.008586201816797256, "kl": 0.03114090859889984, "learning_rate": 3.7518502492350405e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 229 }, { "completion_length": 144.1428680419922, "epoch": 0.1609517144856543, "grad_norm": 1.1607961654663086, "kl": 0.028630482032895088, "learning_rate": 3.749724740759666e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 230 }, { "completion_length": 194.42857360839844, "epoch": 0.16165150454863542, "grad_norm": 0.6749139428138733, "kl": 0.00983841996639967, "learning_rate": 3.7475907755079354e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 231 }, { "completion_length": 185.50001525878906, "epoch": 0.16235129461161651, "grad_norm": 0.005612351931631565, "kl": 0.018870672211050987, "learning_rate": 3.7454483637937376e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 232 }, { "completion_length": 178.6428680419922, "epoch": 0.1630510846745976, "grad_norm": 1.8697179555892944, "kl": 0.014896044507622719, "learning_rate": 3.7432975159717854e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 233 }, { "completion_length": 176.85714721679688, "epoch": 0.16375087473757874, "grad_norm": 0.007061387877911329, "kl": 0.020475070923566818, "learning_rate": 3.741138242437565e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 234 }, { "completion_length": 206.1428680419922, "epoch": 0.16445066480055984, "grad_norm": 0.003183256834745407, "kl": 0.012574908323585987, "learning_rate": 3.738970553627286e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 235 }, { "completion_length": 204.21429443359375, "epoch": 0.16515045486354094, "grad_norm": 0.0038842950016260147, "kl": 0.012972171418368816, "learning_rate": 3.736794460017829e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 236 }, { "completion_length": 180.2857208251953, "epoch": 0.16585024492652203, "grad_norm": 0.591704249382019, "kl": 0.019554350525140762, "learning_rate": 3.7346099721266994e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 237 }, { "completion_length": 174.21429443359375, "epoch": 0.16655003498950316, "grad_norm": 0.0042877099476754665, "kl": 0.02020900696516037, "learning_rate": 3.7324171005119714e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 238 }, { "completion_length": 159.2857208251953, "epoch": 0.16724982505248426, "grad_norm": 0.6230242252349854, "kl": 0.024848150089383125, "learning_rate": 3.7302158557722407e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 239 }, { "completion_length": 196.1428680419922, "epoch": 0.16794961511546536, "grad_norm": 0.44546830654144287, "kl": 0.011597135104238987, "learning_rate": 3.7280062485465724e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 240 }, { "completion_length": 178.2857208251953, "epoch": 0.16864940517844645, "grad_norm": 0.8619314432144165, "kl": 0.019945021718740463, "learning_rate": 3.7257882895144485e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 241 }, { "completion_length": 183.42857360839844, "epoch": 0.16934919524142758, "grad_norm": 0.005130656994879246, "kl": 0.0189889594912529, "learning_rate": 3.7235619893957167e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 242 }, { "completion_length": 213.71429443359375, "epoch": 0.17004898530440868, "grad_norm": 0.4043319523334503, "kl": 0.010713324882090092, "learning_rate": 3.72132735895054e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 243 }, { "completion_length": 177.71429443359375, "epoch": 0.17074877536738978, "grad_norm": 0.007252123672515154, "kl": 0.024714874103665352, "learning_rate": 3.7190844089793423e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 244 }, { "completion_length": 215.07144165039062, "epoch": 0.17144856543037088, "grad_norm": 0.47612982988357544, "kl": 0.009942489676177502, "learning_rate": 3.716833150322758e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 245 }, { "completion_length": 216.00001525878906, "epoch": 0.172148355493352, "grad_norm": 0.00426530372351408, "kl": 0.013690494932234287, "learning_rate": 3.7145735938615795e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 246 }, { "completion_length": 216.1428680419922, "epoch": 0.1728481455563331, "grad_norm": 0.49292147159576416, "kl": 0.01473014522343874, "learning_rate": 3.712305750516703e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 247 }, { "completion_length": 188.71429443359375, "epoch": 0.1735479356193142, "grad_norm": 0.21176354587078094, "kl": 0.018445421010255814, "learning_rate": 3.7100296312490795e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 248 }, { "completion_length": 178.50001525878906, "epoch": 0.17424772568229532, "grad_norm": 0.663266658782959, "kl": 0.013698313385248184, "learning_rate": 3.707745247059655e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 249 }, { "completion_length": 196.71429443359375, "epoch": 0.17494751574527642, "grad_norm": 1.0289642810821533, "kl": 0.0190252847969532, "learning_rate": 3.7054526089893267e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 250 }, { "completion_length": 153.57144165039062, "epoch": 0.17564730580825752, "grad_norm": 0.007452791091054678, "kl": 0.023705553263425827, "learning_rate": 3.7031517281188786e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 251 }, { "completion_length": 113.42857360839844, "epoch": 0.17634709587123862, "grad_norm": 1.0395256280899048, "kl": 0.03168682008981705, "learning_rate": 3.70084261556894e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 252 }, { "completion_length": 214.85714721679688, "epoch": 0.17704688593421974, "grad_norm": 0.002410527318716049, "kl": 0.007506976369768381, "learning_rate": 3.6985252824999204e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 253 }, { "completion_length": 198.50001525878906, "epoch": 0.17774667599720084, "grad_norm": 0.005293078254908323, "kl": 0.017621850594878197, "learning_rate": 3.696199740111964e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 254 }, { "completion_length": 162.1428680419922, "epoch": 0.17844646606018194, "grad_norm": 0.5356236696243286, "kl": 0.03159737586975098, "learning_rate": 3.693865999644891e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 255 }, { "completion_length": 189.2857208251953, "epoch": 0.17914625612316304, "grad_norm": 0.0037518616300076246, "kl": 0.017401469871401787, "learning_rate": 3.6915240723781444e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 256 }, { "completion_length": 205.42857360839844, "epoch": 0.17984604618614417, "grad_norm": 0.0027896377723664045, "kl": 0.01305939070880413, "learning_rate": 3.689173969630737e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 257 }, { "completion_length": 234.00001525878906, "epoch": 0.18054583624912526, "grad_norm": 0.860575258731842, "kl": 0.009122440591454506, "learning_rate": 3.686815702761193e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 258 }, { "completion_length": 243.35714721679688, "epoch": 0.18124562631210636, "grad_norm": 0.0026629262138158083, "kl": 0.0046313246712088585, "learning_rate": 3.684449283167498e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 259 }, { "completion_length": 176.42857360839844, "epoch": 0.1819454163750875, "grad_norm": 0.8813875317573547, "kl": 0.02494039013981819, "learning_rate": 3.6820747222870406e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 260 }, { "completion_length": 182.21429443359375, "epoch": 0.18264520643806859, "grad_norm": 1.1977590322494507, "kl": 0.022919369861483574, "learning_rate": 3.6796920315965565e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 261 }, { "completion_length": 200.1428680419922, "epoch": 0.18334499650104968, "grad_norm": 0.8918925523757935, "kl": 0.017570775002241135, "learning_rate": 3.677301222612077e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 262 }, { "completion_length": 144.6428680419922, "epoch": 0.18404478656403078, "grad_norm": 1.3486170768737793, "kl": 0.041876401752233505, "learning_rate": 3.674902306888867e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 263 }, { "completion_length": 195.2857208251953, "epoch": 0.1847445766270119, "grad_norm": 0.4387526214122772, "kl": 0.014770284295082092, "learning_rate": 3.672495296021378e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 264 }, { "completion_length": 188.6428680419922, "epoch": 0.185444366689993, "grad_norm": 0.008016858249902725, "kl": 0.02409050054848194, "learning_rate": 3.6700802016431827e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 265 }, { "completion_length": 170.2857208251953, "epoch": 0.1861441567529741, "grad_norm": 0.14902782440185547, "kl": 0.04438198730349541, "learning_rate": 3.6676570354269234e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 266 }, { "completion_length": 198.00001525878906, "epoch": 0.1868439468159552, "grad_norm": 0.0023566416930407286, "kl": 0.01239076629281044, "learning_rate": 3.665225809084259e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 267 }, { "completion_length": 199.21429443359375, "epoch": 0.18754373687893633, "grad_norm": 0.7844212651252747, "kl": 0.016460854560136795, "learning_rate": 3.6627865343658e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 268 }, { "completion_length": 215.35714721679688, "epoch": 0.18824352694191743, "grad_norm": 0.0015277478378266096, "kl": 0.011156363412737846, "learning_rate": 3.660339223061059e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 269 }, { "completion_length": 168.85714721679688, "epoch": 0.18894331700489853, "grad_norm": 0.009191691875457764, "kl": 0.02617422305047512, "learning_rate": 3.657883886998391e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 270 }, { "completion_length": 206.50001525878906, "epoch": 0.18964310706787962, "grad_norm": 0.002437381772324443, "kl": 0.011914732865989208, "learning_rate": 3.6554205380449343e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 271 }, { "completion_length": 220.21429443359375, "epoch": 0.19034289713086075, "grad_norm": 0.008528918959200382, "kl": 0.02212802693247795, "learning_rate": 3.652949188106558e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 272 }, { "completion_length": 198.00001525878906, "epoch": 0.19104268719384185, "grad_norm": 2.2235240936279297, "kl": 0.0296917911618948, "learning_rate": 3.6504698491277993e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 273 }, { "completion_length": 177.42857360839844, "epoch": 0.19174247725682295, "grad_norm": 0.00606621615588665, "kl": 0.024508515372872353, "learning_rate": 3.6479825330918095e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 274 }, { "completion_length": 210.92857360839844, "epoch": 0.19244226731980407, "grad_norm": 0.4510830342769623, "kl": 0.007396538741886616, "learning_rate": 3.645487252020294e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 275 }, { "completion_length": 194.42857360839844, "epoch": 0.19314205738278517, "grad_norm": 0.5192714929580688, "kl": 0.013078266754746437, "learning_rate": 3.6429840179734533e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 276 }, { "completion_length": 174.35714721679688, "epoch": 0.19384184744576627, "grad_norm": 2.1625211238861084, "kl": 0.03255700692534447, "learning_rate": 3.64047284304993e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 277 }, { "completion_length": 219.4285888671875, "epoch": 0.19454163750874737, "grad_norm": 0.007325149606913328, "kl": 0.009761854074895382, "learning_rate": 3.6379537393867435e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 278 }, { "completion_length": 229.1428680419922, "epoch": 0.1952414275717285, "grad_norm": 0.003577533410862088, "kl": 0.009171624667942524, "learning_rate": 3.6354267191592355e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 279 }, { "completion_length": 181.6428680419922, "epoch": 0.1959412176347096, "grad_norm": 0.7648665308952332, "kl": 0.029887784272432327, "learning_rate": 3.6328917945810094e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 280 }, { "completion_length": 185.71429443359375, "epoch": 0.1966410076976907, "grad_norm": 0.005976282991468906, "kl": 0.02214181050658226, "learning_rate": 3.630348977903873e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 281 }, { "completion_length": 168.71429443359375, "epoch": 0.1973407977606718, "grad_norm": 0.8603221774101257, "kl": 0.022559626027941704, "learning_rate": 3.6277982814177773e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 282 }, { "completion_length": 196.92857360839844, "epoch": 0.1980405878236529, "grad_norm": 1.4601398706436157, "kl": 0.01703435182571411, "learning_rate": 3.6252397174507585e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 283 }, { "completion_length": 175.1428680419922, "epoch": 0.198740377886634, "grad_norm": 1.3160336017608643, "kl": 0.027639390900731087, "learning_rate": 3.622673298368878e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 284 }, { "completion_length": 191.07144165039062, "epoch": 0.1994401679496151, "grad_norm": 1.9431685209274292, "kl": 0.02291315235197544, "learning_rate": 3.620099036576163e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 285 }, { "completion_length": 177.57144165039062, "epoch": 0.2001399580125962, "grad_norm": 2.985306739807129, "kl": 0.042643751949071884, "learning_rate": 3.6175169445145434e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 286 }, { "completion_length": 155.71429443359375, "epoch": 0.20083974807557733, "grad_norm": 0.002649400383234024, "kl": 0.01661285012960434, "learning_rate": 3.6149270346637984e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 287 }, { "completion_length": 204.21429443359375, "epoch": 0.20153953813855843, "grad_norm": 1.8782721757888794, "kl": 0.01844392530620098, "learning_rate": 3.61232931954149e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 288 }, { "completion_length": 179.00001525878906, "epoch": 0.20223932820153953, "grad_norm": 0.007394877262413502, "kl": 0.02853863313794136, "learning_rate": 3.6097238117029046e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 289 }, { "completion_length": 161.35714721679688, "epoch": 0.20293911826452066, "grad_norm": 1.560531497001648, "kl": 0.04655032977461815, "learning_rate": 3.607110523740992e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 290 }, { "completion_length": 230.50001525878906, "epoch": 0.20363890832750176, "grad_norm": 0.2818041145801544, "kl": 0.024098943918943405, "learning_rate": 3.6044894682863075e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 291 }, { "completion_length": 190.35714721679688, "epoch": 0.20433869839048285, "grad_norm": 0.423895001411438, "kl": 0.023380879312753677, "learning_rate": 3.6018606580069444e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 292 }, { "completion_length": 178.7857208251953, "epoch": 0.20503848845346395, "grad_norm": 0.8743607401847839, "kl": 0.0482012964785099, "learning_rate": 3.5992241056084806e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 293 }, { "completion_length": 199.42857360839844, "epoch": 0.20573827851644508, "grad_norm": 0.8680278062820435, "kl": 0.017911504954099655, "learning_rate": 3.5965798238339095e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 294 }, { "completion_length": 193.50001525878906, "epoch": 0.20643806857942618, "grad_norm": 1.6768147945404053, "kl": 0.02769216150045395, "learning_rate": 3.593927825463585e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 295 }, { "completion_length": 155.21429443359375, "epoch": 0.20713785864240727, "grad_norm": 1.4847735166549683, "kl": 0.02230973169207573, "learning_rate": 3.5912681233151556e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 296 }, { "completion_length": 198.07144165039062, "epoch": 0.20783764870538837, "grad_norm": 0.9611667990684509, "kl": 0.022595616057515144, "learning_rate": 3.588600730243504e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 297 }, { "completion_length": 188.6428680419922, "epoch": 0.2085374387683695, "grad_norm": 1.6368725299835205, "kl": 0.016185186803340912, "learning_rate": 3.585925659140685e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 298 }, { "completion_length": 164.2857208251953, "epoch": 0.2092372288313506, "grad_norm": 1.5320605039596558, "kl": 0.025026416406035423, "learning_rate": 3.583242922935861e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 299 }, { "completion_length": 184.35714721679688, "epoch": 0.2099370188943317, "grad_norm": 0.00491850171238184, "kl": 0.017030972987413406, "learning_rate": 3.5805525345952454e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 300 }, { "completion_length": 169.42857360839844, "epoch": 0.2106368089573128, "grad_norm": 0.005794129334390163, "kl": 0.033467162400484085, "learning_rate": 3.577854507122032e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 301 }, { "completion_length": 191.2857208251953, "epoch": 0.21133659902029392, "grad_norm": 0.3897000849246979, "kl": 0.02598237618803978, "learning_rate": 3.575148853556337e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 302 }, { "completion_length": 198.50001525878906, "epoch": 0.21203638908327502, "grad_norm": 2.381969690322876, "kl": 0.010137013159692287, "learning_rate": 3.572435586975137e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 303 }, { "completion_length": 159.85714721679688, "epoch": 0.21273617914625612, "grad_norm": 2.3385610580444336, "kl": 0.04521346464753151, "learning_rate": 3.569714720492202e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 304 }, { "completion_length": 171.92857360839844, "epoch": 0.21343596920923724, "grad_norm": 0.009324567392468452, "kl": 0.036783114075660706, "learning_rate": 3.566986267258034e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 305 }, { "completion_length": 192.2857208251953, "epoch": 0.21413575927221834, "grad_norm": 0.26683616638183594, "kl": 0.0223538838326931, "learning_rate": 3.5642502404598047e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 306 }, { "completion_length": 187.07144165039062, "epoch": 0.21483554933519944, "grad_norm": 1.4338834285736084, "kl": 0.01840805448591709, "learning_rate": 3.561506653321288e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 307 }, { "completion_length": 221.85714721679688, "epoch": 0.21553533939818054, "grad_norm": 0.0016010769177228212, "kl": 0.006783970165997744, "learning_rate": 3.558755519102801e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 308 }, { "completion_length": 142.1428680419922, "epoch": 0.21623512946116166, "grad_norm": 1.469435453414917, "kl": 0.03523188829421997, "learning_rate": 3.555996851101135e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 309 }, { "completion_length": 200.85714721679688, "epoch": 0.21693491952414276, "grad_norm": 0.7829067707061768, "kl": 0.01796119287610054, "learning_rate": 3.553230662649496e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 310 }, { "completion_length": 195.35714721679688, "epoch": 0.21763470958712386, "grad_norm": 0.7208725214004517, "kl": 0.018696611747145653, "learning_rate": 3.550456967117436e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 311 }, { "completion_length": 183.7857208251953, "epoch": 0.21833449965010496, "grad_norm": 2.9220948219299316, "kl": 0.02893522009253502, "learning_rate": 3.547675777910791e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 312 }, { "completion_length": 196.35714721679688, "epoch": 0.21903428971308608, "grad_norm": 0.005603363737463951, "kl": 0.023820139467716217, "learning_rate": 3.5448871084716156e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 313 }, { "completion_length": 202.50001525878906, "epoch": 0.21973407977606718, "grad_norm": 1.809675693511963, "kl": 0.018190210685133934, "learning_rate": 3.5420909722781173e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "step": 314 }, { "completion_length": 139.71429443359375, "epoch": 0.22043386983904828, "grad_norm": 0.005759156309068203, "kl": 0.027088141068816185, "learning_rate": 3.539287382844593e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 315 }, { "completion_length": 162.92857360839844, "epoch": 0.22113365990202938, "grad_norm": 0.677117645740509, "kl": 0.02727174200117588, "learning_rate": 3.536476353721361e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 316 }, { "completion_length": 199.07144165039062, "epoch": 0.2218334499650105, "grad_norm": 0.4234854578971863, "kl": 0.009189656935632229, "learning_rate": 3.533657898494699e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 317 }, { "completion_length": 169.5, "epoch": 0.2225332400279916, "grad_norm": 0.003168420633301139, "kl": 0.01303552184253931, "learning_rate": 3.530832030786775e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 318 }, { "completion_length": 167.42857360839844, "epoch": 0.2232330300909727, "grad_norm": 0.005851950030773878, "kl": 0.025543315336108208, "learning_rate": 3.527998764255584e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 319 }, { "completion_length": 182.71429443359375, "epoch": 0.22393282015395383, "grad_norm": 0.9313241839408875, "kl": 0.01326631661504507, "learning_rate": 3.52515811259488e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 320 }, { "completion_length": 151.2857208251953, "epoch": 0.22463261021693492, "grad_norm": 2.09081768989563, "kl": 0.04782923310995102, "learning_rate": 3.522310089534113e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 321 }, { "completion_length": 173.71429443359375, "epoch": 0.22533240027991602, "grad_norm": 1.844640851020813, "kl": 0.03384312242269516, "learning_rate": 3.519454708838358e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 322 }, { "completion_length": 197.92857360839844, "epoch": 0.22603219034289712, "grad_norm": 0.47143810987472534, "kl": 0.01529007963836193, "learning_rate": 3.5165919843082527e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 323 }, { "completion_length": 184.07144165039062, "epoch": 0.22673198040587825, "grad_norm": 0.007004480808973312, "kl": 0.028769435361027718, "learning_rate": 3.513721929779927e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 324 }, { "completion_length": 198.92857360839844, "epoch": 0.22743177046885935, "grad_norm": 0.8486812710762024, "kl": 0.0238034650683403, "learning_rate": 3.5108445591249415e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 325 }, { "completion_length": 198.92857360839844, "epoch": 0.22813156053184044, "grad_norm": 0.3655204176902771, "kl": 0.009955493733286858, "learning_rate": 3.5079598862502127e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 326 }, { "completion_length": 158.42857360839844, "epoch": 0.22883135059482154, "grad_norm": 0.0073427981697022915, "kl": 0.03223417326807976, "learning_rate": 3.5050679250979545e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 327 }, { "completion_length": 192.85714721679688, "epoch": 0.22953114065780267, "grad_norm": 0.7927706837654114, "kl": 0.01642785035073757, "learning_rate": 3.502168689645604e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 328 }, { "completion_length": 188.2857208251953, "epoch": 0.23023093072078377, "grad_norm": 0.7956569790840149, "kl": 0.020435122773051262, "learning_rate": 3.499262193905757e-07, "loss": 0.0, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 329 }, { "completion_length": 195.35714721679688, "epoch": 0.23093072078376486, "grad_norm": 0.5955920219421387, "kl": 0.05103569105267525, "learning_rate": 3.4963484519261004e-07, "loss": 0.0001, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 330 }, { "completion_length": 182.35714721679688, "epoch": 0.23163051084674596, "grad_norm": 2.227114677429199, "kl": 0.037436001002788544, "learning_rate": 3.493427477789343e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 331 }, { "completion_length": 169.85714721679688, "epoch": 0.2323303009097271, "grad_norm": 0.7168689370155334, "kl": 0.035353146493434906, "learning_rate": 3.490499285613148e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 332 }, { "completion_length": 190.92857360839844, "epoch": 0.2330300909727082, "grad_norm": 1.7933284044265747, "kl": 0.022699838504195213, "learning_rate": 3.487563889550065e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 333 }, { "completion_length": 173.85714721679688, "epoch": 0.23372988103568929, "grad_norm": 0.0028698286041617393, "kl": 0.015531730838119984, "learning_rate": 3.484621303787462e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 334 }, { "completion_length": 180.50001525878906, "epoch": 0.2344296710986704, "grad_norm": 0.010946807451546192, "kl": 0.026166755706071854, "learning_rate": 3.481671542547456e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 335 }, { "completion_length": 224.6428680419922, "epoch": 0.2351294611616515, "grad_norm": 0.9801257848739624, "kl": 0.009198243729770184, "learning_rate": 3.478714620086844e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 336 }, { "completion_length": 189.85714721679688, "epoch": 0.2358292512246326, "grad_norm": 0.7460960149765015, "kl": 0.018040716648101807, "learning_rate": 3.475750550697034e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 337 }, { "completion_length": 178.71429443359375, "epoch": 0.2365290412876137, "grad_norm": 0.7375363707542419, "kl": 0.018047412857413292, "learning_rate": 3.47277934870398e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 338 }, { "completion_length": 194.71429443359375, "epoch": 0.23722883135059483, "grad_norm": 0.0035240540746599436, "kl": 0.013578321784734726, "learning_rate": 3.4698010284681044e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 339 }, { "completion_length": 186.00001525878906, "epoch": 0.23792862141357593, "grad_norm": 0.010145165026187897, "kl": 0.026406429708003998, "learning_rate": 3.466815604384238e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 340 }, { "completion_length": 176.07144165039062, "epoch": 0.23862841147655703, "grad_norm": 2.3864660263061523, "kl": 0.02834818884730339, "learning_rate": 3.463823090881543e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 341 }, { "completion_length": 186.85714721679688, "epoch": 0.23932820153953813, "grad_norm": 0.3408975601196289, "kl": 0.018927130848169327, "learning_rate": 3.4608235024234474e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 342 }, { "completion_length": 218.6428680419922, "epoch": 0.24002799160251925, "grad_norm": 2.155103921890259, "kl": 0.01563032530248165, "learning_rate": 3.457816853507574e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 343 }, { "completion_length": 209.35714721679688, "epoch": 0.24072778166550035, "grad_norm": 0.003645299468189478, "kl": 0.013128400780260563, "learning_rate": 3.454803158665669e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 344 }, { "completion_length": 199.07144165039062, "epoch": 0.24142757172848145, "grad_norm": 0.003979403525590897, "kl": 0.020676320418715477, "learning_rate": 3.4517824324635345e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 345 }, { "completion_length": 201.21429443359375, "epoch": 0.24212736179146255, "grad_norm": 0.9270420670509338, "kl": 0.01023801788687706, "learning_rate": 3.448754689500956e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 346 }, { "completion_length": 173.42857360839844, "epoch": 0.24282715185444367, "grad_norm": 0.9614090323448181, "kl": 0.022334929555654526, "learning_rate": 3.445719944411633e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 347 }, { "completion_length": 193.57144165039062, "epoch": 0.24352694191742477, "grad_norm": 0.6329968571662903, "kl": 0.03677517920732498, "learning_rate": 3.4426782118631065e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 348 }, { "completion_length": 188.92857360839844, "epoch": 0.24422673198040587, "grad_norm": 0.0044876690953969955, "kl": 0.02110815793275833, "learning_rate": 3.4396295065566896e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "step": 349 }, { "completion_length": 184.92857360839844, "epoch": 0.244926522043387, "grad_norm": 2.3479273319244385, "kl": 0.02571859024465084, "learning_rate": 3.436573843227397e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 350 }, { "completion_length": 172.42857360839844, "epoch": 0.2456263121063681, "grad_norm": 0.15331880748271942, "kl": 0.018859129399061203, "learning_rate": 3.4335112366438724e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 351 }, { "completion_length": 178.42857360839844, "epoch": 0.2463261021693492, "grad_norm": 0.8721615672111511, "kl": 0.027232788503170013, "learning_rate": 3.4304417016083183e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 352 }, { "completion_length": 177.57144165039062, "epoch": 0.2470258922323303, "grad_norm": 0.00629481952637434, "kl": 0.023867381736636162, "learning_rate": 3.427365252956423e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 353 }, { "completion_length": 184.07144165039062, "epoch": 0.24772568229531142, "grad_norm": 0.005834286566823721, "kl": 0.029148750007152557, "learning_rate": 3.424281905557289e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 354 }, { "completion_length": 197.00001525878906, "epoch": 0.24842547235829252, "grad_norm": 0.9186586141586304, "kl": 0.02936585247516632, "learning_rate": 3.4211916743133643e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 355 }, { "completion_length": 181.6428680419922, "epoch": 0.2491252624212736, "grad_norm": 1.5280996561050415, "kl": 0.044441286474466324, "learning_rate": 3.4180945741603654e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 356 }, { "completion_length": 185.57144165039062, "epoch": 0.2498250524842547, "grad_norm": 2.672974109649658, "kl": 0.0459924079477787, "learning_rate": 3.4149906200672086e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 357 }, { "completion_length": 165.71429443359375, "epoch": 0.25052484254723584, "grad_norm": 0.007046896498650312, "kl": 0.03102685511112213, "learning_rate": 3.411879827035937e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 358 }, { "completion_length": 157.92857360839844, "epoch": 0.25122463261021694, "grad_norm": 0.6856256723403931, "kl": 0.04061925411224365, "learning_rate": 3.4087622101016484e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 359 }, { "completion_length": 172.2857208251953, "epoch": 0.25192442267319803, "grad_norm": 0.003146551316604018, "kl": 0.01322211790829897, "learning_rate": 3.40563778433242e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 360 }, { "completion_length": 181.2857208251953, "epoch": 0.25262421273617913, "grad_norm": 0.7451429963111877, "kl": 0.034680504351854324, "learning_rate": 3.402506564829239e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 361 }, { "completion_length": 174.85714721679688, "epoch": 0.25332400279916023, "grad_norm": 0.007092596963047981, "kl": 0.03785092756152153, "learning_rate": 3.399368566725927e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 362 }, { "completion_length": 223.1428680419922, "epoch": 0.2540237928621414, "grad_norm": 0.7141907215118408, "kl": 0.010259164497256279, "learning_rate": 3.396223805189068e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 363 }, { "completion_length": 200.85714721679688, "epoch": 0.2547235829251225, "grad_norm": 0.6981567144393921, "kl": 0.02434498630464077, "learning_rate": 3.393072295417937e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 364 }, { "completion_length": 182.35714721679688, "epoch": 0.2554233729881036, "grad_norm": 0.6945000290870667, "kl": 0.03650469705462456, "learning_rate": 3.389914052644423e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 365 }, { "completion_length": 182.6428680419922, "epoch": 0.2561231630510847, "grad_norm": 2.042914867401123, "kl": 0.02148597687482834, "learning_rate": 3.3867490921329557e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 366 }, { "completion_length": 186.50001525878906, "epoch": 0.2568229531140658, "grad_norm": 2.7780933380126953, "kl": 0.04937548562884331, "learning_rate": 3.3835774291804357e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 367 }, { "completion_length": 165.07144165039062, "epoch": 0.2575227431770469, "grad_norm": 0.41162511706352234, "kl": 0.01816270314157009, "learning_rate": 3.3803990791161567e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 368 }, { "completion_length": 200.1428680419922, "epoch": 0.258222533240028, "grad_norm": 0.9085342288017273, "kl": 0.017531177029013634, "learning_rate": 3.3772140573017316e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 369 }, { "completion_length": 210.7857208251953, "epoch": 0.25892232330300907, "grad_norm": 1.0211751461029053, "kl": 0.022307362407445908, "learning_rate": 3.3740223791310205e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 370 }, { "completion_length": 220.50001525878906, "epoch": 0.2596221133659902, "grad_norm": 0.00706710759550333, "kl": 0.022423824295401573, "learning_rate": 3.370824060030054e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 371 }, { "completion_length": 227.35714721679688, "epoch": 0.2603219034289713, "grad_norm": 0.003542853519320488, "kl": 0.017943989485502243, "learning_rate": 3.3676191154569627e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 372 }, { "completion_length": 160.92857360839844, "epoch": 0.2610216934919524, "grad_norm": 0.6578136086463928, "kl": 0.04885334149003029, "learning_rate": 3.364407560901894e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 373 }, { "completion_length": 177.2857208251953, "epoch": 0.2617214835549335, "grad_norm": 1.1439343690872192, "kl": 0.025096897035837173, "learning_rate": 3.361189411886947e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 374 }, { "completion_length": 197.2857208251953, "epoch": 0.2624212736179146, "grad_norm": 1.142340064048767, "kl": 0.02332978881895542, "learning_rate": 3.3579646839660923e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 375 }, { "completion_length": 165.57144165039062, "epoch": 0.2631210636808957, "grad_norm": 0.010401121340692043, "kl": 0.04579593241214752, "learning_rate": 3.3547333927250973e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 376 }, { "completion_length": 193.92857360839844, "epoch": 0.2638208537438768, "grad_norm": 0.7678545117378235, "kl": 0.01843220740556717, "learning_rate": 3.3514955537814506e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 377 }, { "completion_length": 168.35714721679688, "epoch": 0.26452064380685797, "grad_norm": 1.4129458665847778, "kl": 0.031178675591945648, "learning_rate": 3.3482511827842885e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 378 }, { "completion_length": 189.85714721679688, "epoch": 0.26522043386983907, "grad_norm": 0.4906970262527466, "kl": 0.011025870218873024, "learning_rate": 3.3450002954143165e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 379 }, { "completion_length": 201.71429443359375, "epoch": 0.26592022393282017, "grad_norm": 0.9279190301895142, "kl": 0.018940366804599762, "learning_rate": 3.341742907383737e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 380 }, { "completion_length": 188.92857360839844, "epoch": 0.26662001399580126, "grad_norm": 0.5956905484199524, "kl": 0.029153874143958092, "learning_rate": 3.33847903443617e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 381 }, { "completion_length": 173.2857208251953, "epoch": 0.26731980405878236, "grad_norm": 1.0010219812393188, "kl": 0.029253369197249413, "learning_rate": 3.335208692346579e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 382 }, { "completion_length": 178.07144165039062, "epoch": 0.26801959412176346, "grad_norm": 2.680180072784424, "kl": 0.028426989912986755, "learning_rate": 3.331931896921193e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 383 }, { "completion_length": 170.07144165039062, "epoch": 0.26871938418474456, "grad_norm": 0.005205106921494007, "kl": 0.023029359057545662, "learning_rate": 3.3286486639974333e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 384 }, { "completion_length": 175.21429443359375, "epoch": 0.26941917424772566, "grad_norm": 1.9119175672531128, "kl": 0.033027730882167816, "learning_rate": 3.325359009443834e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 385 }, { "completion_length": 175.42857360839844, "epoch": 0.2701189643107068, "grad_norm": 0.004134666174650192, "kl": 0.021845312789082527, "learning_rate": 3.3220629491599645e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 386 }, { "completion_length": 181.1428680419922, "epoch": 0.2708187543736879, "grad_norm": 1.2391949892044067, "kl": 0.03317902237176895, "learning_rate": 3.318760499076358e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 387 }, { "completion_length": 208.6428680419922, "epoch": 0.271518544436669, "grad_norm": 0.006398039869964123, "kl": 0.021072857081890106, "learning_rate": 3.3154516751544286e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 388 }, { "completion_length": 229.85714721679688, "epoch": 0.2722183344996501, "grad_norm": 1.2711175680160522, "kl": 0.012140165083110332, "learning_rate": 3.312136493386396e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 389 }, { "completion_length": 200.6428680419922, "epoch": 0.2729181245626312, "grad_norm": 0.004189657047390938, "kl": 0.016920868307352066, "learning_rate": 3.308814969795211e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 390 }, { "completion_length": 175.85714721679688, "epoch": 0.2736179146256123, "grad_norm": 1.2724921703338623, "kl": 0.02616955153644085, "learning_rate": 3.305487120434472e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 391 }, { "completion_length": 180.85714721679688, "epoch": 0.2743177046885934, "grad_norm": 1.3679845333099365, "kl": 0.025574391707777977, "learning_rate": 3.3021529613883557e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 392 }, { "completion_length": 179.35714721679688, "epoch": 0.27501749475157455, "grad_norm": 1.2445677518844604, "kl": 0.022411201149225235, "learning_rate": 3.2988125087715304e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 393 }, { "completion_length": 181.71429443359375, "epoch": 0.27571728481455565, "grad_norm": 1.1583846807479858, "kl": 0.025185860693454742, "learning_rate": 3.2954657787290854e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 394 }, { "completion_length": 187.35714721679688, "epoch": 0.27641707487753675, "grad_norm": 1.3583505153656006, "kl": 0.02562299557030201, "learning_rate": 3.2921127874364495e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 395 }, { "completion_length": 176.07144165039062, "epoch": 0.27711686494051785, "grad_norm": 0.5323813557624817, "kl": 0.04567427560687065, "learning_rate": 3.2887535510993133e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 396 }, { "completion_length": 179.21429443359375, "epoch": 0.27781665500349895, "grad_norm": 0.43912947177886963, "kl": 0.02632255293428898, "learning_rate": 3.28538808595355e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 397 }, { "completion_length": 197.42857360839844, "epoch": 0.27851644506648005, "grad_norm": 0.004964158404618502, "kl": 0.025014452636241913, "learning_rate": 3.2820164082651395e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 398 }, { "completion_length": 180.21429443359375, "epoch": 0.27921623512946114, "grad_norm": 1.811867117881775, "kl": 0.017614759504795074, "learning_rate": 3.2786385343300867e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.6428571939468384, "step": 399 }, { "completion_length": 190.50001525878906, "epoch": 0.27991602519244224, "grad_norm": 2.259676456451416, "kl": 0.02197328954935074, "learning_rate": 3.275254480474345e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 400 }, { "completion_length": 180.35714721679688, "epoch": 0.2806158152554234, "grad_norm": 0.006867602933198214, "kl": 0.029920106753706932, "learning_rate": 3.2718642630537365e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 401 }, { "completion_length": 160.57144165039062, "epoch": 0.2813156053184045, "grad_norm": 0.005083282012492418, "kl": 0.02295750565826893, "learning_rate": 3.268467898453874e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 402 }, { "completion_length": 183.42857360839844, "epoch": 0.2820153953813856, "grad_norm": 0.005555762443691492, "kl": 0.02372424677014351, "learning_rate": 3.265065403090079e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 403 }, { "completion_length": 196.07144165039062, "epoch": 0.2827151854443667, "grad_norm": 0.9104824662208557, "kl": 0.021749457344412804, "learning_rate": 3.2616567934073055e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 404 }, { "completion_length": 155.07144165039062, "epoch": 0.2834149755073478, "grad_norm": 1.1475639343261719, "kl": 0.015838012099266052, "learning_rate": 3.2582420858800594e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 405 }, { "completion_length": 231.35714721679688, "epoch": 0.2841147655703289, "grad_norm": 0.6490912437438965, "kl": 0.012299914844334126, "learning_rate": 3.2548212970123176e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 406 }, { "completion_length": 191.35714721679688, "epoch": 0.28481455563331, "grad_norm": 0.005345775280147791, "kl": 0.020644469186663628, "learning_rate": 3.2513944433374496e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 407 }, { "completion_length": 188.85714721679688, "epoch": 0.28551434569629114, "grad_norm": 0.7396431565284729, "kl": 0.024756096303462982, "learning_rate": 3.2479615414181393e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 408 }, { "completion_length": 159.35714721679688, "epoch": 0.28621413575927224, "grad_norm": 1.3673442602157593, "kl": 0.03201150521636009, "learning_rate": 3.2445226078463003e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 409 }, { "completion_length": 175.6428680419922, "epoch": 0.28691392582225334, "grad_norm": 0.006910817231982946, "kl": 0.03318994492292404, "learning_rate": 3.2410776592429993e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 410 }, { "completion_length": 200.57144165039062, "epoch": 0.28761371588523443, "grad_norm": 0.8189232349395752, "kl": 0.014908754266798496, "learning_rate": 3.2376267122583765e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 411 }, { "completion_length": 199.7857208251953, "epoch": 0.28831350594821553, "grad_norm": 1.0011372566223145, "kl": 0.026383478194475174, "learning_rate": 3.234169783571561e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 412 }, { "completion_length": 197.71429443359375, "epoch": 0.28901329601119663, "grad_norm": 0.6323328614234924, "kl": 0.029050709679722786, "learning_rate": 3.2307068898905946e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 413 }, { "completion_length": 196.2857208251953, "epoch": 0.28971308607417773, "grad_norm": 0.7771051526069641, "kl": 0.020319391041994095, "learning_rate": 3.227238047952348e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 414 }, { "completion_length": 192.07144165039062, "epoch": 0.2904128761371588, "grad_norm": 0.7694050073623657, "kl": 0.013213138096034527, "learning_rate": 3.2237632745224415e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 415 }, { "completion_length": 192.71429443359375, "epoch": 0.29111266620014, "grad_norm": 0.003982989117503166, "kl": 0.01707112416625023, "learning_rate": 3.2202825863951624e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 416 }, { "completion_length": 203.92857360839844, "epoch": 0.2918124562631211, "grad_norm": 0.8812685012817383, "kl": 0.019418731331825256, "learning_rate": 3.2167960003933884e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 417 }, { "completion_length": 148.92857360839844, "epoch": 0.2925122463261022, "grad_norm": 0.9575265049934387, "kl": 0.041733238846063614, "learning_rate": 3.2133035333684985e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 418 }, { "completion_length": 191.07144165039062, "epoch": 0.2932120363890833, "grad_norm": 0.6743429899215698, "kl": 0.02127690054476261, "learning_rate": 3.2098052022002976e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 419 }, { "completion_length": 173.1428680419922, "epoch": 0.2939118264520644, "grad_norm": 1.4550578594207764, "kl": 0.039650507271289825, "learning_rate": 3.206301023796934e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 420 }, { "completion_length": 195.71429443359375, "epoch": 0.29461161651504547, "grad_norm": 1.651827096939087, "kl": 0.030250184237957, "learning_rate": 3.2027910150948166e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 421 }, { "completion_length": 198.85714721679688, "epoch": 0.29531140657802657, "grad_norm": 0.38959023356437683, "kl": 0.014403433538973331, "learning_rate": 3.1992751930585325e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 422 }, { "completion_length": 159.6428680419922, "epoch": 0.2960111966410077, "grad_norm": 1.345651626586914, "kl": 0.03709445148706436, "learning_rate": 3.195753574680766e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 423 }, { "completion_length": 212.71429443359375, "epoch": 0.2967109867039888, "grad_norm": 1.3157126903533936, "kl": 0.025067942216992378, "learning_rate": 3.192226176982218e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 424 }, { "completion_length": 216.71429443359375, "epoch": 0.2974107767669699, "grad_norm": 0.7132723927497864, "kl": 0.014864970929920673, "learning_rate": 3.188693017011519e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 425 }, { "completion_length": 170.6428680419922, "epoch": 0.298110566829951, "grad_norm": 0.006631612312048674, "kl": 0.03703395277261734, "learning_rate": 3.1851541118451517e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 426 }, { "completion_length": 178.42857360839844, "epoch": 0.2988103568929321, "grad_norm": 1.1006743907928467, "kl": 0.05553247407078743, "learning_rate": 3.181609478587367e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 427 }, { "completion_length": 183.00001525878906, "epoch": 0.2995101469559132, "grad_norm": 0.006483997218310833, "kl": 0.025525551289319992, "learning_rate": 3.1780591343700993e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 428 }, { "completion_length": 188.1428680419922, "epoch": 0.3002099370188943, "grad_norm": 3.3809287548065186, "kl": 0.0215449295938015, "learning_rate": 3.1745030963528867e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 429 }, { "completion_length": 217.71429443359375, "epoch": 0.3009097270818754, "grad_norm": 1.399482250213623, "kl": 0.015575726516544819, "learning_rate": 3.1709413817227847e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 430 }, { "completion_length": 185.42857360839844, "epoch": 0.30160951714485656, "grad_norm": 0.8324539661407471, "kl": 0.01825200393795967, "learning_rate": 3.1673740076942875e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 431 }, { "completion_length": 189.35714721679688, "epoch": 0.30230930720783766, "grad_norm": 0.004090950824320316, "kl": 0.01635858602821827, "learning_rate": 3.1638009915092393e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 432 }, { "completion_length": 205.7857208251953, "epoch": 0.30300909727081876, "grad_norm": 1.0944817066192627, "kl": 0.026154454797506332, "learning_rate": 3.160222350436757e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 433 }, { "completion_length": 191.85714721679688, "epoch": 0.30370888733379986, "grad_norm": 0.6462128162384033, "kl": 0.018102161586284637, "learning_rate": 3.156638101773143e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 434 }, { "completion_length": 215.35714721679688, "epoch": 0.30440867739678096, "grad_norm": 0.004543273244053125, "kl": 0.02344910241663456, "learning_rate": 3.1530482628418e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 435 }, { "completion_length": 211.50001525878906, "epoch": 0.30510846745976206, "grad_norm": 0.007342597935348749, "kl": 0.023821823298931122, "learning_rate": 3.149452850993152e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 436 }, { "completion_length": 195.50001525878906, "epoch": 0.30580825752274315, "grad_norm": 1.1299775838851929, "kl": 0.02076021395623684, "learning_rate": 3.145851883604558e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 437 }, { "completion_length": 186.2857208251953, "epoch": 0.3065080475857243, "grad_norm": 0.8335484862327576, "kl": 0.031062429770827293, "learning_rate": 3.1422453780802264e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 438 }, { "completion_length": 195.50001525878906, "epoch": 0.3072078376487054, "grad_norm": 0.007976455613970757, "kl": 0.02391653135418892, "learning_rate": 3.1386333518511345e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 439 }, { "completion_length": 200.21429443359375, "epoch": 0.3079076277116865, "grad_norm": 0.7803175449371338, "kl": 0.020620595663785934, "learning_rate": 3.1350158223749413e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 440 }, { "completion_length": 195.6428680419922, "epoch": 0.3086074177746676, "grad_norm": 0.004706548992544413, "kl": 0.020712165161967278, "learning_rate": 3.1313928071359036e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 441 }, { "completion_length": 196.85714721679688, "epoch": 0.3093072078376487, "grad_norm": 0.8195966482162476, "kl": 0.032697319984436035, "learning_rate": 3.1277643236447934e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 442 }, { "completion_length": 171.00001525878906, "epoch": 0.3100069979006298, "grad_norm": 1.1854536533355713, "kl": 0.038218557834625244, "learning_rate": 3.124130389438811e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 443 }, { "completion_length": 164.85714721679688, "epoch": 0.3107067879636109, "grad_norm": 0.8803959488868713, "kl": 0.036526720970869064, "learning_rate": 3.1204910220815005e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 444 }, { "completion_length": 207.07144165039062, "epoch": 0.311406578026592, "grad_norm": 0.8009771704673767, "kl": 0.020504629239439964, "learning_rate": 3.116846239162666e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 445 }, { "completion_length": 199.92857360839844, "epoch": 0.31210636808957315, "grad_norm": 1.1930276155471802, "kl": 0.02396634966135025, "learning_rate": 3.1131960582982876e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 446 }, { "completion_length": 201.92857360839844, "epoch": 0.31280615815255425, "grad_norm": 0.007053898181766272, "kl": 0.02715187333524227, "learning_rate": 3.109540497130433e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 447 }, { "completion_length": 144.2857208251953, "epoch": 0.31350594821553535, "grad_norm": 1.8172719478607178, "kl": 0.034259747713804245, "learning_rate": 3.105879573327174e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 448 }, { "completion_length": 198.57144165039062, "epoch": 0.31420573827851644, "grad_norm": 0.8178118467330933, "kl": 0.028386041522026062, "learning_rate": 3.102213304582502e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 449 }, { "completion_length": 198.1428680419922, "epoch": 0.31490552834149754, "grad_norm": 1.054382085800171, "kl": 0.017203137278556824, "learning_rate": 3.0985417086162417e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 450 }, { "completion_length": 204.92857360839844, "epoch": 0.31560531840447864, "grad_norm": 0.8276398777961731, "kl": 0.02506078965961933, "learning_rate": 3.094864803173964e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 451 }, { "completion_length": 181.35714721679688, "epoch": 0.31630510846745974, "grad_norm": 1.2800688743591309, "kl": 0.03069334104657173, "learning_rate": 3.091182606026903e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 452 }, { "completion_length": 183.2857208251953, "epoch": 0.3170048985304409, "grad_norm": 0.8573112487792969, "kl": 0.0253884457051754, "learning_rate": 3.087495134971867e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 453 }, { "completion_length": 198.85714721679688, "epoch": 0.317704688593422, "grad_norm": 0.004384016152471304, "kl": 0.01863146387040615, "learning_rate": 3.0838024078311577e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 454 }, { "completion_length": 216.00001525878906, "epoch": 0.3184044786564031, "grad_norm": 0.4845581352710724, "kl": 0.01092128548771143, "learning_rate": 3.080104442452476e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 455 }, { "completion_length": 204.00001525878906, "epoch": 0.3191042687193842, "grad_norm": 0.0038679074496030807, "kl": 0.017811274155974388, "learning_rate": 3.076401256708843e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 456 }, { "completion_length": 182.07144165039062, "epoch": 0.3198040587823653, "grad_norm": 2.2464168071746826, "kl": 0.04614328593015671, "learning_rate": 3.0726928684985105e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 457 }, { "completion_length": 181.50001525878906, "epoch": 0.3205038488453464, "grad_norm": 1.5309993028640747, "kl": 0.022071899846196175, "learning_rate": 3.0689792957448753e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 458 }, { "completion_length": 159.57144165039062, "epoch": 0.3212036389083275, "grad_norm": 0.6382618546485901, "kl": 0.04648633673787117, "learning_rate": 3.0652605563963896e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 459 }, { "completion_length": 165.85714721679688, "epoch": 0.3219034289713086, "grad_norm": 1.0912450551986694, "kl": 0.026466449722647667, "learning_rate": 3.061536668426481e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 460 }, { "completion_length": 196.1428680419922, "epoch": 0.32260321903428973, "grad_norm": 0.005231840070337057, "kl": 0.026346756145358086, "learning_rate": 3.057807649833457e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 461 }, { "completion_length": 221.9285888671875, "epoch": 0.32330300909727083, "grad_norm": 1.4150323867797852, "kl": 0.01162702776491642, "learning_rate": 3.054073518640427e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 462 }, { "completion_length": 183.35714721679688, "epoch": 0.32400279916025193, "grad_norm": 3.7010645866394043, "kl": 0.03048822656273842, "learning_rate": 3.050334292895207e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 463 }, { "completion_length": 196.35714721679688, "epoch": 0.32470258922323303, "grad_norm": 0.765592634677887, "kl": 0.019732153043150902, "learning_rate": 3.0465899906702365e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 464 }, { "completion_length": 197.50001525878906, "epoch": 0.3254023792862141, "grad_norm": 0.9544207453727722, "kl": 0.023729894310235977, "learning_rate": 3.042840630062493e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 465 }, { "completion_length": 148.2857208251953, "epoch": 0.3261021693491952, "grad_norm": 2.0614309310913086, "kl": 0.0456995889544487, "learning_rate": 3.039086229193399e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 466 }, { "completion_length": 193.1428680419922, "epoch": 0.3268019594121763, "grad_norm": 0.004235828295350075, "kl": 0.02045871689915657, "learning_rate": 3.035326806208741e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 467 }, { "completion_length": 209.42857360839844, "epoch": 0.3275017494751575, "grad_norm": 0.8472065329551697, "kl": 0.012107213959097862, "learning_rate": 3.031562379278575e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 468 }, { "completion_length": 208.00001525878906, "epoch": 0.3282015395381386, "grad_norm": 0.9307161569595337, "kl": 0.03212505951523781, "learning_rate": 3.0277929665971447e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 469 }, { "completion_length": 184.1428680419922, "epoch": 0.3289013296011197, "grad_norm": 0.00613441364839673, "kl": 0.029952503740787506, "learning_rate": 3.02401858638279e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 470 }, { "completion_length": 145.07144165039062, "epoch": 0.3296011196641008, "grad_norm": 1.3014689683914185, "kl": 0.05368249490857124, "learning_rate": 3.0202392568778593e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 471 }, { "completion_length": 205.2857208251953, "epoch": 0.33030090972708187, "grad_norm": 1.50192129611969, "kl": 0.02523050829768181, "learning_rate": 3.0164549963486235e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 472 }, { "completion_length": 180.50001525878906, "epoch": 0.33100069979006297, "grad_norm": 0.8972246646881104, "kl": 0.05419189855456352, "learning_rate": 3.0126658230851845e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 473 }, { "completion_length": 184.85714721679688, "epoch": 0.33170048985304407, "grad_norm": 1.803112268447876, "kl": 0.02427106536924839, "learning_rate": 3.0088717554013884e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 474 }, { "completion_length": 197.7857208251953, "epoch": 0.33240027991602517, "grad_norm": 1.4649418592453003, "kl": 0.04181037098169327, "learning_rate": 3.00507281163474e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 475 }, { "completion_length": 190.00001525878906, "epoch": 0.3331000699790063, "grad_norm": 1.0860769748687744, "kl": 0.033349357545375824, "learning_rate": 3.001269010146306e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 476 }, { "completion_length": 171.1428680419922, "epoch": 0.3337998600419874, "grad_norm": 1.4760380983352661, "kl": 0.042727965861558914, "learning_rate": 2.9974603693206366e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 477 }, { "completion_length": 188.2857208251953, "epoch": 0.3344996501049685, "grad_norm": 0.5807764530181885, "kl": 0.024857990443706512, "learning_rate": 2.9936469075656683e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 478 }, { "completion_length": 214.1428680419922, "epoch": 0.3351994401679496, "grad_norm": 1.7858057022094727, "kl": 0.021253671497106552, "learning_rate": 2.989828643312639e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 479 }, { "completion_length": 211.6428680419922, "epoch": 0.3358992302309307, "grad_norm": 1.2214895486831665, "kl": 0.02429124526679516, "learning_rate": 2.9860055950159994e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 480 }, { "completion_length": 184.35714721679688, "epoch": 0.3365990202939118, "grad_norm": 0.008441203273832798, "kl": 0.04451807960867882, "learning_rate": 2.98217778115332e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 481 }, { "completion_length": 132.5, "epoch": 0.3372988103568929, "grad_norm": 1.2302217483520508, "kl": 0.038474034518003464, "learning_rate": 2.9783452202252065e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 482 }, { "completion_length": 195.92857360839844, "epoch": 0.33799860041987406, "grad_norm": 1.357618808746338, "kl": 0.022870337590575218, "learning_rate": 2.974507930755206e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 483 }, { "completion_length": 168.07144165039062, "epoch": 0.33869839048285516, "grad_norm": 0.007778555620461702, "kl": 0.04222830384969711, "learning_rate": 2.970665931289722e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 484 }, { "completion_length": 199.21429443359375, "epoch": 0.33939818054583626, "grad_norm": 1.7007582187652588, "kl": 0.0240781269967556, "learning_rate": 2.9668192403979194e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 485 }, { "completion_length": 176.57144165039062, "epoch": 0.34009797060881736, "grad_norm": 0.004922912456095219, "kl": 0.03378324955701828, "learning_rate": 2.9629678766716414e-07, "loss": 0.0, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "step": 486 }, { "completion_length": 181.6428680419922, "epoch": 0.34079776067179846, "grad_norm": 0.008107264526188374, "kl": 0.02736920677125454, "learning_rate": 2.9591118587253125e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 487 }, { "completion_length": 185.2857208251953, "epoch": 0.34149755073477955, "grad_norm": 1.4481292963027954, "kl": 0.027283810079097748, "learning_rate": 2.9552512051958545e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 488 }, { "completion_length": 162.21429443359375, "epoch": 0.34219734079776065, "grad_norm": 2.112471103668213, "kl": 0.04522133991122246, "learning_rate": 2.951385934742592e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 489 }, { "completion_length": 197.71429443359375, "epoch": 0.34289713086074175, "grad_norm": 1.134124755859375, "kl": 0.028563635423779488, "learning_rate": 2.947516066047166e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 490 }, { "completion_length": 180.21429443359375, "epoch": 0.3435969209237229, "grad_norm": 0.8103861808776855, "kl": 0.02636152133345604, "learning_rate": 2.94364161781344e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 491 }, { "completion_length": 153.1428680419922, "epoch": 0.344296710986704, "grad_norm": 1.6501652002334595, "kl": 0.03942684456706047, "learning_rate": 2.939762608767413e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 492 }, { "completion_length": 146.2857208251953, "epoch": 0.3449965010496851, "grad_norm": 1.1731581687927246, "kl": 0.04676751047372818, "learning_rate": 2.9358790576571254e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 493 }, { "completion_length": 184.1428680419922, "epoch": 0.3456962911126662, "grad_norm": 0.8028280138969421, "kl": 0.028286494314670563, "learning_rate": 2.9319909832525717e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 494 }, { "completion_length": 130.85714721679688, "epoch": 0.3463960811756473, "grad_norm": 0.007896073162555695, "kl": 0.04474465176463127, "learning_rate": 2.9280984043456087e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 495 }, { "completion_length": 235.07144165039062, "epoch": 0.3470958712386284, "grad_norm": 1.6852149963378906, "kl": 0.01534675806760788, "learning_rate": 2.9242013397498635e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 496 }, { "completion_length": 181.92857360839844, "epoch": 0.3477956613016095, "grad_norm": 2.4247560501098633, "kl": 0.034807994961738586, "learning_rate": 2.920299808300643e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 497 }, { "completion_length": 201.2857208251953, "epoch": 0.34849545136459065, "grad_norm": 2.073784112930298, "kl": 0.03788765147328377, "learning_rate": 2.9163938288548445e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 498 }, { "completion_length": 188.1428680419922, "epoch": 0.34919524142757175, "grad_norm": 2.0151796340942383, "kl": 0.039097972214221954, "learning_rate": 2.912483420290863e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 499 }, { "completion_length": 183.50001525878906, "epoch": 0.34989503149055284, "grad_norm": 1.8998000621795654, "kl": 0.038978077471256256, "learning_rate": 2.9085686015085005e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 500 }, { "completion_length": 156.21429443359375, "epoch": 0.35059482155353394, "grad_norm": 2.9568324089050293, "kl": 0.05164632946252823, "learning_rate": 2.904649391428874e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 501 }, { "completion_length": 180.85714721679688, "epoch": 0.35129461161651504, "grad_norm": 3.0119810104370117, "kl": 0.034306906163692474, "learning_rate": 2.9007258089943246e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 502 }, { "completion_length": 218.00001525878906, "epoch": 0.35199440167949614, "grad_norm": 0.7019025087356567, "kl": 0.03303629904985428, "learning_rate": 2.896797873168326e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 503 }, { "completion_length": 213.6428680419922, "epoch": 0.35269419174247724, "grad_norm": 1.0243908166885376, "kl": 0.029559114947915077, "learning_rate": 2.892865602935393e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 504 }, { "completion_length": 168.92857360839844, "epoch": 0.35339398180545833, "grad_norm": 2.033384323120117, "kl": 0.05405454337596893, "learning_rate": 2.8889290173009897e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 505 }, { "completion_length": 171.6428680419922, "epoch": 0.3540937718684395, "grad_norm": 0.8686595559120178, "kl": 0.046510692685842514, "learning_rate": 2.884988135291435e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 506 }, { "completion_length": 170.57144165039062, "epoch": 0.3547935619314206, "grad_norm": 1.2175019979476929, "kl": 0.05344880744814873, "learning_rate": 2.881042975953817e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 507 }, { "completion_length": 194.6428680419922, "epoch": 0.3554933519944017, "grad_norm": 1.1815731525421143, "kl": 0.03906374052166939, "learning_rate": 2.8770935583558944e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 508 }, { "completion_length": 173.35714721679688, "epoch": 0.3561931420573828, "grad_norm": 0.009414693340659142, "kl": 0.038596801459789276, "learning_rate": 2.8731399015860074e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 509 }, { "completion_length": 158.07144165039062, "epoch": 0.3568929321203639, "grad_norm": 0.007976559922099113, "kl": 0.05238325893878937, "learning_rate": 2.869182024752986e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 510 }, { "completion_length": 190.35714721679688, "epoch": 0.357592722183345, "grad_norm": 1.144026279449463, "kl": 0.03528675436973572, "learning_rate": 2.865219946986054e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 511 }, { "completion_length": 226.00001525878906, "epoch": 0.3582925122463261, "grad_norm": 1.22597336769104, "kl": 0.015853432938456535, "learning_rate": 2.8612536874347424e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 512 }, { "completion_length": 206.00001525878906, "epoch": 0.35899230230930723, "grad_norm": 0.005469062831252813, "kl": 0.020527929067611694, "learning_rate": 2.8572832652687913e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 513 }, { "completion_length": 184.35714721679688, "epoch": 0.35969209237228833, "grad_norm": 0.007139692083001137, "kl": 0.039707526564598083, "learning_rate": 2.853308699678061e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 514 }, { "completion_length": 206.71429443359375, "epoch": 0.36039188243526943, "grad_norm": 0.8990456461906433, "kl": 0.03507751226425171, "learning_rate": 2.849330009872437e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 515 }, { "completion_length": 215.1428680419922, "epoch": 0.3610916724982505, "grad_norm": 1.0554438829421997, "kl": 0.046111151576042175, "learning_rate": 2.845347215081738e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 516 }, { "completion_length": 194.42857360839844, "epoch": 0.3617914625612316, "grad_norm": 1.4664751291275024, "kl": 0.04037764295935631, "learning_rate": 2.8413603345556234e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 517 }, { "completion_length": 171.21429443359375, "epoch": 0.3624912526242127, "grad_norm": 1.1242681741714478, "kl": 0.0358518548309803, "learning_rate": 2.837369387563499e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 518 }, { "completion_length": 217.42857360839844, "epoch": 0.3631910426871938, "grad_norm": 2.206460952758789, "kl": 0.02211880125105381, "learning_rate": 2.8333743933944265e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 519 }, { "completion_length": 184.21429443359375, "epoch": 0.363890832750175, "grad_norm": 0.004289484582841396, "kl": 0.02826312743127346, "learning_rate": 2.829375371357025e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 520 }, { "completion_length": 176.85714721679688, "epoch": 0.3645906228131561, "grad_norm": 0.006967071909457445, "kl": 0.0346333347260952, "learning_rate": 2.8253723407793853e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 521 }, { "completion_length": 197.07144165039062, "epoch": 0.36529041287613717, "grad_norm": 2.3508365154266357, "kl": 0.039042286574840546, "learning_rate": 2.8213653210089685e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 522 }, { "completion_length": 174.35714721679688, "epoch": 0.36599020293911827, "grad_norm": 0.7139243483543396, "kl": 0.03926414996385574, "learning_rate": 2.8173543314125194e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 523 }, { "completion_length": 215.35714721679688, "epoch": 0.36668999300209937, "grad_norm": 0.8621992468833923, "kl": 0.019353993237018585, "learning_rate": 2.813339391375968e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 524 }, { "completion_length": 175.07144165039062, "epoch": 0.36738978306508047, "grad_norm": 2.0378146171569824, "kl": 0.044867780059576035, "learning_rate": 2.8093205203043373e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 525 }, { "completion_length": 174.00001525878906, "epoch": 0.36808957312806156, "grad_norm": 2.014314651489258, "kl": 0.04805855080485344, "learning_rate": 2.8052977376216507e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 526 }, { "completion_length": 171.71429443359375, "epoch": 0.36878936319104266, "grad_norm": 1.53948175907135, "kl": 0.035063281655311584, "learning_rate": 2.8012710627708374e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 527 }, { "completion_length": 154.85714721679688, "epoch": 0.3694891532540238, "grad_norm": 0.8369784355163574, "kl": 0.06181350722908974, "learning_rate": 2.7972405152136376e-07, "loss": 0.0001, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "step": 528 }, { "completion_length": 187.2857208251953, "epoch": 0.3701889433170049, "grad_norm": 2.59834361076355, "kl": 0.03680596500635147, "learning_rate": 2.7932061144305084e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 529 }, { "completion_length": 184.2857208251953, "epoch": 0.370888733379986, "grad_norm": 2.813354969024658, "kl": 0.04346339777112007, "learning_rate": 2.7891678799205325e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "step": 530 }, { "completion_length": 198.71429443359375, "epoch": 0.3715885234429671, "grad_norm": 1.1584117412567139, "kl": 0.02804117649793625, "learning_rate": 2.78512583120132e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 531 }, { "completion_length": 178.50001525878906, "epoch": 0.3722883135059482, "grad_norm": 1.3236713409423828, "kl": 0.05426415801048279, "learning_rate": 2.781079987808916e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 532 }, { "completion_length": 165.2857208251953, "epoch": 0.3729881035689293, "grad_norm": 2.2150092124938965, "kl": 0.054815713316202164, "learning_rate": 2.777030369297707e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 533 }, { "completion_length": 185.71429443359375, "epoch": 0.3736878936319104, "grad_norm": 1.8651477098464966, "kl": 0.04314412921667099, "learning_rate": 2.772976995240325e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 534 }, { "completion_length": 196.1428680419922, "epoch": 0.37438768369489156, "grad_norm": 0.00770213408395648, "kl": 0.037844039499759674, "learning_rate": 2.768919885227551e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 535 }, { "completion_length": 196.85714721679688, "epoch": 0.37508747375787266, "grad_norm": 2.5110208988189697, "kl": 0.04185169190168381, "learning_rate": 2.764859058868228e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 536 }, { "completion_length": 191.35714721679688, "epoch": 0.37578726382085376, "grad_norm": 1.851997971534729, "kl": 0.04296501353383064, "learning_rate": 2.7607945357891546e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 537 }, { "completion_length": 170.21429443359375, "epoch": 0.37648705388383485, "grad_norm": 2.525156259536743, "kl": 0.05845579877495766, "learning_rate": 2.7567263356350016e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 538 }, { "completion_length": 192.57144165039062, "epoch": 0.37718684394681595, "grad_norm": 24.581480026245117, "kl": 2.964402198791504, "learning_rate": 2.752654478068208e-07, "loss": 0.003, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "step": 539 }, { "completion_length": 173.2857208251953, "epoch": 0.37788663400979705, "grad_norm": 1.9153430461883545, "kl": 0.056273799389600754, "learning_rate": 2.748578982768893e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 540 }, { "completion_length": 170.57144165039062, "epoch": 0.37858642407277815, "grad_norm": 1.2073723077774048, "kl": 0.0511692650616169, "learning_rate": 2.7444998694347546e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 541 }, { "completion_length": 184.21429443359375, "epoch": 0.37928621413575925, "grad_norm": 1.784035086631775, "kl": 0.05286607891321182, "learning_rate": 2.7404171577809803e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 542 }, { "completion_length": 188.92857360839844, "epoch": 0.3799860041987404, "grad_norm": 2.3592467308044434, "kl": 0.03701862320303917, "learning_rate": 2.736330867540147e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 543 }, { "completion_length": 210.07144165039062, "epoch": 0.3806857942617215, "grad_norm": 0.7970978617668152, "kl": 0.04706348478794098, "learning_rate": 2.732241018462129e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 544 }, { "completion_length": 193.85714721679688, "epoch": 0.3813855843247026, "grad_norm": 1.835613489151001, "kl": 0.06348719447851181, "learning_rate": 2.7281476303140013e-07, "loss": 0.0001, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 545 }, { "completion_length": 189.92857360839844, "epoch": 0.3820853743876837, "grad_norm": 3.5407755374908447, "kl": 0.05623449757695198, "learning_rate": 2.724050722879941e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 546 }, { "completion_length": 213.42857360839844, "epoch": 0.3827851644506648, "grad_norm": 1.8167250156402588, "kl": 0.021622229367494583, "learning_rate": 2.719950315961139e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 547 }, { "completion_length": 195.07144165039062, "epoch": 0.3834849545136459, "grad_norm": 2.0906736850738525, "kl": 0.05723336338996887, "learning_rate": 2.715846429375697e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 548 }, { "completion_length": 182.21429443359375, "epoch": 0.384184744576627, "grad_norm": 3.0801124572753906, "kl": 0.056581608951091766, "learning_rate": 2.711739082958536e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 549 }, { "completion_length": 167.1428680419922, "epoch": 0.38488453463960814, "grad_norm": 2.2385222911834717, "kl": 0.08181693404912949, "learning_rate": 2.7076282965612963e-07, "loss": 0.0001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 550 }, { "completion_length": 199.6428680419922, "epoch": 0.38558432470258924, "grad_norm": 2.1039581298828125, "kl": 0.058807622641325, "learning_rate": 2.7035140900522504e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 551 }, { "completion_length": 216.50001525878906, "epoch": 0.38628411476557034, "grad_norm": 2.4044368267059326, "kl": 0.033771004527807236, "learning_rate": 2.699396483316193e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 552 }, { "completion_length": 188.85714721679688, "epoch": 0.38698390482855144, "grad_norm": 2.4988067150115967, "kl": 0.07769946753978729, "learning_rate": 2.69527549625436e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 553 }, { "completion_length": 181.1428680419922, "epoch": 0.38768369489153254, "grad_norm": 2.1368510723114014, "kl": 0.054260946810245514, "learning_rate": 2.691151148784321e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 554 }, { "completion_length": 196.50001525878906, "epoch": 0.38838348495451364, "grad_norm": 2.683488130569458, "kl": 0.03385968133807182, "learning_rate": 2.687023460839887e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 555 }, { "completion_length": 216.35714721679688, "epoch": 0.38908327501749473, "grad_norm": 0.009019927121698856, "kl": 0.03483878821134567, "learning_rate": 2.6828924523710166e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 556 }, { "completion_length": 176.50001525878906, "epoch": 0.38978306508047583, "grad_norm": 1.9738969802856445, "kl": 0.0592254213988781, "learning_rate": 2.678758143343715e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 557 }, { "completion_length": 183.7857208251953, "epoch": 0.390482855143457, "grad_norm": 1.8640234470367432, "kl": 0.052767064422369, "learning_rate": 2.674620553739941e-07, "loss": 0.0001, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 558 }, { "completion_length": 207.50001525878906, "epoch": 0.3911826452064381, "grad_norm": 0.012400349602103233, "kl": 0.04630473628640175, "learning_rate": 2.670479703557508e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 559 }, { "completion_length": 184.1428680419922, "epoch": 0.3918824352694192, "grad_norm": 2.8871028423309326, "kl": 0.056125763803720474, "learning_rate": 2.66633561280999e-07, "loss": 0.0001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 560 }, { "completion_length": 177.21429443359375, "epoch": 0.3925822253324003, "grad_norm": 2.44193696975708, "kl": 0.06930401176214218, "learning_rate": 2.662188301526621e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 561 }, { "completion_length": 176.92857360839844, "epoch": 0.3932820153953814, "grad_norm": 3.4117348194122314, "kl": 0.08207827061414719, "learning_rate": 2.658037789752204e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 562 }, { "completion_length": 180.92857360839844, "epoch": 0.3939818054583625, "grad_norm": 4.170308589935303, "kl": 0.06983046233654022, "learning_rate": 2.653884097547006e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 563 }, { "completion_length": 167.0, "epoch": 0.3946815955213436, "grad_norm": 4.456571578979492, "kl": 0.09263132512569427, "learning_rate": 2.6497272449866704e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 564 }, { "completion_length": 182.1428680419922, "epoch": 0.39538138558432473, "grad_norm": 2.575380563735962, "kl": 0.051423076540231705, "learning_rate": 2.645567252162111e-07, "loss": 0.0001, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 565 }, { "completion_length": 178.57144165039062, "epoch": 0.3960811756473058, "grad_norm": 2.034536123275757, "kl": 0.06636127829551697, "learning_rate": 2.641404139179422e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 566 }, { "completion_length": 208.07144165039062, "epoch": 0.3967809657102869, "grad_norm": 1.6797178983688354, "kl": 0.05298960953950882, "learning_rate": 2.6372379261597784e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 567 }, { "completion_length": 219.1428680419922, "epoch": 0.397480755773268, "grad_norm": 1.012208342552185, "kl": 0.02927366830408573, "learning_rate": 2.633068633239335e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 568 }, { "completion_length": 168.1428680419922, "epoch": 0.3981805458362491, "grad_norm": 4.316521644592285, "kl": 0.09156784415245056, "learning_rate": 2.6288962805691354e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 569 }, { "completion_length": 162.2857208251953, "epoch": 0.3988803358992302, "grad_norm": 3.2122459411621094, "kl": 0.08237835764884949, "learning_rate": 2.62472088831501e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 570 }, { "completion_length": 177.7857208251953, "epoch": 0.3995801259622113, "grad_norm": 0.022715821862220764, "kl": 0.08199223130941391, "learning_rate": 2.620542476657482e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 571 }, { "completion_length": 167.1428680419922, "epoch": 0.4002799160251924, "grad_norm": 2.4406580924987793, "kl": 0.0790218934416771, "learning_rate": 2.616361065791665e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 572 }, { "completion_length": 156.07144165039062, "epoch": 0.40097970608817357, "grad_norm": 0.02388128452003002, "kl": 0.09695427119731903, "learning_rate": 2.6121766759271714e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 573 }, { "completion_length": 169.35714721679688, "epoch": 0.40167949615115467, "grad_norm": 2.7152810096740723, "kl": 0.08760807663202286, "learning_rate": 2.6079893272880096e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 574 }, { "completion_length": 172.1428680419922, "epoch": 0.40237928621413577, "grad_norm": 3.6040616035461426, "kl": 0.0899227112531662, "learning_rate": 2.60379904011249e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 575 }, { "completion_length": 174.35714721679688, "epoch": 0.40307907627711687, "grad_norm": 2.5446419715881348, "kl": 0.08045003563165665, "learning_rate": 2.599605834653124e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 576 }, { "completion_length": 184.42857360839844, "epoch": 0.40377886634009796, "grad_norm": 2.3170862197875977, "kl": 0.0715753585100174, "learning_rate": 2.595409731176529e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 577 }, { "completion_length": 167.92857360839844, "epoch": 0.40447865640307906, "grad_norm": 3.1105737686157227, "kl": 0.08440612256526947, "learning_rate": 2.5912107499633276e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 578 }, { "completion_length": 223.4285888671875, "epoch": 0.40517844646606016, "grad_norm": 2.328325033187866, "kl": 0.03954140096902847, "learning_rate": 2.587008911308053e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 579 }, { "completion_length": 208.42857360839844, "epoch": 0.4058782365290413, "grad_norm": 1.0618088245391846, "kl": 0.050909895449876785, "learning_rate": 2.582804235519047e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 580 }, { "completion_length": 193.2857208251953, "epoch": 0.4065780265920224, "grad_norm": 1.4221429824829102, "kl": 0.03781288117170334, "learning_rate": 2.578596742918365e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 581 }, { "completion_length": 165.0, "epoch": 0.4072778166550035, "grad_norm": 1.6162134408950806, "kl": 0.0694863349199295, "learning_rate": 2.5743864538416773e-07, "loss": 0.0001, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 582 }, { "completion_length": 194.35714721679688, "epoch": 0.4079776067179846, "grad_norm": 4.4587721824646, "kl": 0.046782199293375015, "learning_rate": 2.570173388638169e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.6428571939468384, "step": 583 }, { "completion_length": 198.92857360839844, "epoch": 0.4086773967809657, "grad_norm": 1.545332431793213, "kl": 0.05908704176545143, "learning_rate": 2.565957567670442e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 584 }, { "completion_length": 187.57144165039062, "epoch": 0.4093771868439468, "grad_norm": 3.9582760334014893, "kl": 0.05953739210963249, "learning_rate": 2.5617390113144195e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 585 }, { "completion_length": 187.7857208251953, "epoch": 0.4100769769069279, "grad_norm": 0.00880065280944109, "kl": 0.046849362552165985, "learning_rate": 2.557517739959244e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 586 }, { "completion_length": 194.2857208251953, "epoch": 0.410776766969909, "grad_norm": 3.71048903465271, "kl": 0.044254012405872345, "learning_rate": 2.553293774007181e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 587 }, { "completion_length": 175.07144165039062, "epoch": 0.41147655703289016, "grad_norm": 2.1399154663085938, "kl": 0.058250073343515396, "learning_rate": 2.5490671338735175e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 588 }, { "completion_length": 224.71429443359375, "epoch": 0.41217634709587125, "grad_norm": 1.0939371585845947, "kl": 0.02916034124791622, "learning_rate": 2.544837839986468e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 589 }, { "completion_length": 147.7857208251953, "epoch": 0.41287613715885235, "grad_norm": 2.57330060005188, "kl": 0.07541820406913757, "learning_rate": 2.5406059127870726e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 590 }, { "completion_length": 158.71429443359375, "epoch": 0.41357592722183345, "grad_norm": 2.0319535732269287, "kl": 0.06947360932826996, "learning_rate": 2.536371372729097e-07, "loss": 0.0001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 591 }, { "completion_length": 173.07144165039062, "epoch": 0.41427571728481455, "grad_norm": 2.184640884399414, "kl": 0.05149083212018013, "learning_rate": 2.532134240278937e-07, "loss": 0.0001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 592 }, { "completion_length": 186.6428680419922, "epoch": 0.41497550734779565, "grad_norm": 0.007053438574075699, "kl": 0.04499982297420502, "learning_rate": 2.5278945359155177e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 593 }, { "completion_length": 154.71429443359375, "epoch": 0.41567529741077675, "grad_norm": 2.4145848751068115, "kl": 0.05270431935787201, "learning_rate": 2.523652280130194e-07, "loss": 0.0001, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 594 }, { "completion_length": 139.6428680419922, "epoch": 0.4163750874737579, "grad_norm": 1.4822930097579956, "kl": 0.06764822453260422, "learning_rate": 2.5194074934266536e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 595 }, { "completion_length": 154.21429443359375, "epoch": 0.417074877536739, "grad_norm": 2.3014943599700928, "kl": 0.06624924391508102, "learning_rate": 2.515160196320815e-07, "loss": 0.0001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 596 }, { "completion_length": 179.42857360839844, "epoch": 0.4177746675997201, "grad_norm": 1.316183090209961, "kl": 0.04376129060983658, "learning_rate": 2.510910409340732e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 597 }, { "completion_length": 190.1428680419922, "epoch": 0.4184744576627012, "grad_norm": 2.9795424938201904, "kl": 0.05925116315484047, "learning_rate": 2.5066581530264897e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 598 }, { "completion_length": 181.2857208251953, "epoch": 0.4191742477256823, "grad_norm": 3.705047130584717, "kl": 0.05754886567592621, "learning_rate": 2.5024034479301117e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 599 }, { "completion_length": 212.21429443359375, "epoch": 0.4198740377886634, "grad_norm": 1.580612301826477, "kl": 0.030571185052394867, "learning_rate": 2.498146314615454e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 600 }, { "completion_length": 178.42857360839844, "epoch": 0.4205738278516445, "grad_norm": 0.00882403552532196, "kl": 0.05023515596985817, "learning_rate": 2.493886773658111e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 601 }, { "completion_length": 154.57144165039062, "epoch": 0.4212736179146256, "grad_norm": 1.8183329105377197, "kl": 0.051789525896310806, "learning_rate": 2.48962484564531e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 602 }, { "completion_length": 204.92857360839844, "epoch": 0.42197340797760674, "grad_norm": 1.2268946170806885, "kl": 0.04350917041301727, "learning_rate": 2.485360551175819e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 603 }, { "completion_length": 153.57144165039062, "epoch": 0.42267319804058784, "grad_norm": 2.0239200592041016, "kl": 0.05288690701127052, "learning_rate": 2.481093910859844e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 604 }, { "completion_length": 158.85714721679688, "epoch": 0.42337298810356894, "grad_norm": 1.8971515893936157, "kl": 0.06349381804466248, "learning_rate": 2.4768249453189254e-07, "loss": 0.0001, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 605 }, { "completion_length": 190.92857360839844, "epoch": 0.42407277816655004, "grad_norm": 2.755600690841675, "kl": 0.03697388619184494, "learning_rate": 2.4725536751858447e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 606 }, { "completion_length": 181.57144165039062, "epoch": 0.42477256822953113, "grad_norm": 0.006492472253739834, "kl": 0.043367899954319, "learning_rate": 2.468280121104521e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 607 }, { "completion_length": 217.00001525878906, "epoch": 0.42547235829251223, "grad_norm": 1.355871558189392, "kl": 0.0219882819801569, "learning_rate": 2.4640043037299134e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 608 }, { "completion_length": 214.07144165039062, "epoch": 0.42617214835549333, "grad_norm": 0.8965709805488586, "kl": 0.050184134393930435, "learning_rate": 2.4597262437279166e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 609 }, { "completion_length": 220.21429443359375, "epoch": 0.4268719384184745, "grad_norm": 1.6475117206573486, "kl": 0.032147444784641266, "learning_rate": 2.455445961775269e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 610 }, { "completion_length": 191.1428680419922, "epoch": 0.4275717284814556, "grad_norm": 3.3601479530334473, "kl": 0.03844405338168144, "learning_rate": 2.4511634785594437e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 611 }, { "completion_length": 207.85714721679688, "epoch": 0.4282715185444367, "grad_norm": 1.5599465370178223, "kl": 0.03253539651632309, "learning_rate": 2.4468788147785575e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 612 }, { "completion_length": 202.57144165039062, "epoch": 0.4289713086074178, "grad_norm": 2.3960530757904053, "kl": 0.04084521159529686, "learning_rate": 2.4425919911412615e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 613 }, { "completion_length": 166.85714721679688, "epoch": 0.4296710986703989, "grad_norm": 1.580222725868225, "kl": 0.044861774891614914, "learning_rate": 2.43830302836665e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 614 }, { "completion_length": 226.9285888671875, "epoch": 0.43037088873338, "grad_norm": 2.250927209854126, "kl": 0.030000748112797737, "learning_rate": 2.4340119471841535e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 615 }, { "completion_length": 183.2857208251953, "epoch": 0.4310706787963611, "grad_norm": 0.007283986546099186, "kl": 0.05382065102458, "learning_rate": 2.429718768333443e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 616 }, { "completion_length": 172.35714721679688, "epoch": 0.43177046885934217, "grad_norm": 2.248180866241455, "kl": 0.04193298891186714, "learning_rate": 2.4254235125643256e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 617 }, { "completion_length": 178.85714721679688, "epoch": 0.4324702589223233, "grad_norm": 0.005598884075880051, "kl": 0.039547648280858994, "learning_rate": 2.4211262006366487e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 618 }, { "completion_length": 194.7857208251953, "epoch": 0.4331700489853044, "grad_norm": 0.948202908039093, "kl": 0.034319400787353516, "learning_rate": 2.4168268533201974e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 619 }, { "completion_length": 191.2857208251953, "epoch": 0.4338698390482855, "grad_norm": 1.6938560009002686, "kl": 0.04942072555422783, "learning_rate": 2.412525491394593e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 620 }, { "completion_length": 156.0, "epoch": 0.4345696291112666, "grad_norm": 1.8129706382751465, "kl": 0.048300568014383316, "learning_rate": 2.4082221356491945e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 621 }, { "completion_length": 187.7857208251953, "epoch": 0.4352694191742477, "grad_norm": 1.6512104272842407, "kl": 0.03514597937464714, "learning_rate": 2.403916806882998e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 622 }, { "completion_length": 227.57144165039062, "epoch": 0.4359692092372288, "grad_norm": 1.0237085819244385, "kl": 0.02763524278998375, "learning_rate": 2.399609525904536e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 623 }, { "completion_length": 179.35714721679688, "epoch": 0.4366689993002099, "grad_norm": 2.3410961627960205, "kl": 0.06135363504290581, "learning_rate": 2.3953003135317725e-07, "loss": 0.0001, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "step": 624 }, { "completion_length": 164.5, "epoch": 0.43736878936319107, "grad_norm": 2.9406211376190186, "kl": 0.05737534910440445, "learning_rate": 2.3909891905920116e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 625 }, { "completion_length": 230.57144165039062, "epoch": 0.43806857942617217, "grad_norm": 1.5693167448043823, "kl": 0.03000093251466751, "learning_rate": 2.386676177921789e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 626 }, { "completion_length": 174.6428680419922, "epoch": 0.43876836948915326, "grad_norm": 0.006668443791568279, "kl": 0.0467531643807888, "learning_rate": 2.3823612963667748e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 627 }, { "completion_length": 195.6428680419922, "epoch": 0.43946815955213436, "grad_norm": 1.811854600906372, "kl": 0.034590449184179306, "learning_rate": 2.3780445667816697e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 628 }, { "completion_length": 201.71429443359375, "epoch": 0.44016794961511546, "grad_norm": 0.00650835270062089, "kl": 0.03414328768849373, "learning_rate": 2.3737260100301086e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 629 }, { "completion_length": 217.6428680419922, "epoch": 0.44086773967809656, "grad_norm": 1.516406774520874, "kl": 0.02930428460240364, "learning_rate": 2.369405646984556e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 630 }, { "completion_length": 172.35714721679688, "epoch": 0.44156752974107766, "grad_norm": 2.1428630352020264, "kl": 0.03527773916721344, "learning_rate": 2.3650834985262083e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 631 }, { "completion_length": 213.6428680419922, "epoch": 0.44226731980405876, "grad_norm": 1.0643141269683838, "kl": 0.032824594527482986, "learning_rate": 2.3607595855448893e-07, "loss": 0.0, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "step": 632 }, { "completion_length": 204.92857360839844, "epoch": 0.4429671098670399, "grad_norm": 2.404583215713501, "kl": 0.027230154722929, "learning_rate": 2.3564339289389513e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "step": 633 }, { "completion_length": 169.92857360839844, "epoch": 0.443666899930021, "grad_norm": 1.9238661527633667, "kl": 0.041090719401836395, "learning_rate": 2.3521065496151765e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 634 }, { "completion_length": 210.1428680419922, "epoch": 0.4443666899930021, "grad_norm": 0.8436352610588074, "kl": 0.0280954260379076, "learning_rate": 2.347777468488669e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 635 }, { "completion_length": 216.57144165039062, "epoch": 0.4450664800559832, "grad_norm": 0.0065932204015553, "kl": 0.02991357073187828, "learning_rate": 2.3434467064827616e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 636 }, { "completion_length": 197.21429443359375, "epoch": 0.4457662701189643, "grad_norm": 1.2168712615966797, "kl": 0.036863137036561966, "learning_rate": 2.3391142845289097e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "step": 637 }, { "completion_length": 159.71429443359375, "epoch": 0.4464660601819454, "grad_norm": 0.009670078754425049, "kl": 0.05564764887094498, "learning_rate": 2.334780223566592e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 638 }, { "completion_length": 166.7857208251953, "epoch": 0.4471658502449265, "grad_norm": 2.02837872505188, "kl": 0.050884928554296494, "learning_rate": 2.3304445445432077e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 639 }, { "completion_length": 171.85714721679688, "epoch": 0.44786564030790765, "grad_norm": 0.010904277674853802, "kl": 0.05013133957982063, "learning_rate": 2.3261072684139785e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 640 }, { "completion_length": 187.6428680419922, "epoch": 0.44856543037088875, "grad_norm": 0.007786628790199757, "kl": 0.044351689517498016, "learning_rate": 2.3217684161418436e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 641 }, { "completion_length": 147.42857360839844, "epoch": 0.44926522043386985, "grad_norm": 2.2840495109558105, "kl": 0.04322770982980728, "learning_rate": 2.3174280086973605e-07, "loss": 0.0, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 642 }, { "completion_length": 198.85714721679688, "epoch": 0.44996501049685095, "grad_norm": 1.339922308921814, "kl": 0.04232291132211685, "learning_rate": 2.3130860670586032e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 643 }, { "completion_length": 198.71429443359375, "epoch": 0.45066480055983205, "grad_norm": 1.6301075220108032, "kl": 0.02942308969795704, "learning_rate": 2.308742612211061e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 644 }, { "completion_length": 161.07144165039062, "epoch": 0.45136459062281314, "grad_norm": 1.520746111869812, "kl": 0.048625752329826355, "learning_rate": 2.3043976651475366e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 645 }, { "completion_length": 206.71429443359375, "epoch": 0.45206438068579424, "grad_norm": 1.8411654233932495, "kl": 0.03329163417220116, "learning_rate": 2.300051246868044e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 646 }, { "completion_length": 196.07144165039062, "epoch": 0.45276417074877534, "grad_norm": 1.7877939939498901, "kl": 0.0342370830476284, "learning_rate": 2.2957033783797098e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 647 }, { "completion_length": 167.57144165039062, "epoch": 0.4534639608117565, "grad_norm": 1.2771666049957275, "kl": 0.04394586384296417, "learning_rate": 2.2913540806966676e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 648 }, { "completion_length": 172.92857360839844, "epoch": 0.4541637508747376, "grad_norm": 1.193971872329712, "kl": 0.046529728919267654, "learning_rate": 2.2870033748399612e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 649 }, { "completion_length": 178.1428680419922, "epoch": 0.4548635409377187, "grad_norm": 2.165350914001465, "kl": 0.048747967928647995, "learning_rate": 2.2826512818374381e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 650 }, { "completion_length": 198.71429443359375, "epoch": 0.4555633310006998, "grad_norm": 2.628448247909546, "kl": 0.053916797041893005, "learning_rate": 2.278297822723651e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 651 }, { "completion_length": 169.35714721679688, "epoch": 0.4562631210636809, "grad_norm": 2.885871648788452, "kl": 0.0751948207616806, "learning_rate": 2.273943018539755e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 652 }, { "completion_length": 162.57144165039062, "epoch": 0.456962911126662, "grad_norm": 0.007941286079585552, "kl": 0.047953445464372635, "learning_rate": 2.2695868903334072e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 653 }, { "completion_length": 192.6428680419922, "epoch": 0.4576627011896431, "grad_norm": 0.00795914325863123, "kl": 0.048438169062137604, "learning_rate": 2.2652294591586621e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 654 }, { "completion_length": 194.50001525878906, "epoch": 0.45836249125262424, "grad_norm": 1.705178141593933, "kl": 0.03709828481078148, "learning_rate": 2.260870746075874e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 655 }, { "completion_length": 140.57144165039062, "epoch": 0.45906228131560534, "grad_norm": 1.660890817642212, "kl": 0.0642286092042923, "learning_rate": 2.256510772151591e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 656 }, { "completion_length": 199.2857208251953, "epoch": 0.45976207137858643, "grad_norm": 0.007773585617542267, "kl": 0.045362599194049835, "learning_rate": 2.2521495584584564e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 657 }, { "completion_length": 224.35714721679688, "epoch": 0.46046186144156753, "grad_norm": 0.006898650899529457, "kl": 0.03265133500099182, "learning_rate": 2.2477871260751047e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 658 }, { "completion_length": 217.00001525878906, "epoch": 0.46116165150454863, "grad_norm": 2.767052173614502, "kl": 0.04190690070390701, "learning_rate": 2.2434234960860604e-07, "loss": 0.0, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "step": 659 }, { "completion_length": 182.21429443359375, "epoch": 0.46186144156752973, "grad_norm": 1.0015095472335815, "kl": 0.04846643656492233, "learning_rate": 2.239058689581638e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 660 }, { "completion_length": 194.6428680419922, "epoch": 0.4625612316305108, "grad_norm": 2.769559383392334, "kl": 0.04055805504322052, "learning_rate": 2.234692727657836e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 661 }, { "completion_length": 128.1428680419922, "epoch": 0.4632610216934919, "grad_norm": 1.6715753078460693, "kl": 0.07706505060195923, "learning_rate": 2.230325631416239e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 662 }, { "completion_length": 197.57144165039062, "epoch": 0.4639608117564731, "grad_norm": 2.4168155193328857, "kl": 0.04533996060490608, "learning_rate": 2.2259574219639124e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 663 }, { "completion_length": 194.35714721679688, "epoch": 0.4646606018194542, "grad_norm": 0.8889102935791016, "kl": 0.03331875056028366, "learning_rate": 2.2215881204133047e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 664 }, { "completion_length": 181.6428680419922, "epoch": 0.4653603918824353, "grad_norm": 1.8534014225006104, "kl": 0.03304433822631836, "learning_rate": 2.2172177478821395e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 665 }, { "completion_length": 222.4285888671875, "epoch": 0.4660601819454164, "grad_norm": 1.8191381692886353, "kl": 0.028013775125145912, "learning_rate": 2.2128463254933186e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 666 }, { "completion_length": 193.00001525878906, "epoch": 0.4667599720083975, "grad_norm": 1.7169095277786255, "kl": 0.022591177374124527, "learning_rate": 2.2084738743748174e-07, "loss": 0.0, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 667 }, { "completion_length": 189.7857208251953, "epoch": 0.46745976207137857, "grad_norm": 0.011743742041289806, "kl": 0.05124860256910324, "learning_rate": 2.2041004156595845e-07, "loss": 0.0001, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "step": 668 }, { "completion_length": 194.7857208251953, "epoch": 0.46815955213435967, "grad_norm": 1.3765090703964233, "kl": 0.04434319958090782, "learning_rate": 2.199725970485436e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 669 }, { "completion_length": 147.42857360839844, "epoch": 0.4688593421973408, "grad_norm": 1.9276536703109741, "kl": 0.06591746211051941, "learning_rate": 2.1953505599949573e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 670 }, { "completion_length": 175.21429443359375, "epoch": 0.4695591322603219, "grad_norm": 2.328869581222534, "kl": 0.06332084536552429, "learning_rate": 2.1909742053354003e-07, "loss": 0.0001, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "step": 671 }, { "completion_length": 211.07144165039062, "epoch": 0.470258922323303, "grad_norm": 0.9379087686538696, "kl": 0.03213494271039963, "learning_rate": 2.1865969276585786e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 672 }, { "completion_length": 167.35714721679688, "epoch": 0.4709587123862841, "grad_norm": 0.009628983214497566, "kl": 0.06781083345413208, "learning_rate": 2.1822187481207672e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 673 }, { "completion_length": 186.1428680419922, "epoch": 0.4716585024492652, "grad_norm": 1.3198316097259521, "kl": 0.0385563038289547, "learning_rate": 2.1778396878826006e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 674 }, { "completion_length": 172.92857360839844, "epoch": 0.4723582925122463, "grad_norm": 0.007041162345558405, "kl": 0.052339326590299606, "learning_rate": 2.17345976810897e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 675 }, { "completion_length": 214.00001525878906, "epoch": 0.4730580825752274, "grad_norm": 1.0516942739486694, "kl": 0.03277459740638733, "learning_rate": 2.1690790099689193e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 676 }, { "completion_length": 212.2857208251953, "epoch": 0.4737578726382085, "grad_norm": 0.0056027439422905445, "kl": 0.038574155420064926, "learning_rate": 2.164697434635547e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 677 }, { "completion_length": 171.6428680419922, "epoch": 0.47445766270118966, "grad_norm": 1.6738466024398804, "kl": 0.05795716121792793, "learning_rate": 2.1603150632858983e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 678 }, { "completion_length": 172.07144165039062, "epoch": 0.47515745276417076, "grad_norm": 1.538047432899475, "kl": 0.040635015815496445, "learning_rate": 2.1559319171008696e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 679 }, { "completion_length": 197.50001525878906, "epoch": 0.47585724282715186, "grad_norm": 1.2881625890731812, "kl": 0.03642135113477707, "learning_rate": 2.151548017265098e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 680 }, { "completion_length": 214.2857208251953, "epoch": 0.47655703289013296, "grad_norm": 0.8933217525482178, "kl": 0.031822219491004944, "learning_rate": 2.1471633849668663e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 681 }, { "completion_length": 199.21429443359375, "epoch": 0.47725682295311406, "grad_norm": 1.5765355825424194, "kl": 0.032428424805402756, "learning_rate": 2.142778041397995e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 682 }, { "completion_length": 168.85714721679688, "epoch": 0.47795661301609516, "grad_norm": 1.352317214012146, "kl": 0.05361265316605568, "learning_rate": 2.1383920077537443e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 683 }, { "completion_length": 166.71429443359375, "epoch": 0.47865640307907625, "grad_norm": 1.072190761566162, "kl": 0.04521774500608444, "learning_rate": 2.1340053052327084e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 684 }, { "completion_length": 223.6428680419922, "epoch": 0.4793561931420574, "grad_norm": 1.0753237009048462, "kl": 0.0425286665558815, "learning_rate": 2.1296179550367151e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 685 }, { "completion_length": 193.00001525878906, "epoch": 0.4800559832050385, "grad_norm": 1.2362627983093262, "kl": 0.044981617480516434, "learning_rate": 2.125229978370723e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 686 }, { "completion_length": 197.71429443359375, "epoch": 0.4807557732680196, "grad_norm": 0.008858474902808666, "kl": 0.05235905200242996, "learning_rate": 2.1208413964427167e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 687 }, { "completion_length": 170.7857208251953, "epoch": 0.4814555633310007, "grad_norm": 0.9643062949180603, "kl": 0.057162944227457047, "learning_rate": 2.116452230463608e-07, "loss": 0.0001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 688 }, { "completion_length": 161.5, "epoch": 0.4821553533939818, "grad_norm": 2.4657928943634033, "kl": 0.073735311627388, "learning_rate": 2.11206250164713e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 689 }, { "completion_length": 161.5, "epoch": 0.4828551434569629, "grad_norm": 0.010531275533139706, "kl": 0.06991841644048691, "learning_rate": 2.107672231209738e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 690 }, { "completion_length": 180.6428680419922, "epoch": 0.483554933519944, "grad_norm": 1.2783242464065552, "kl": 0.05982107296586037, "learning_rate": 2.1032814403705025e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 691 }, { "completion_length": 218.00001525878906, "epoch": 0.4842547235829251, "grad_norm": 0.006291645113378763, "kl": 0.03857533633708954, "learning_rate": 2.098890150351013e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 692 }, { "completion_length": 185.50001525878906, "epoch": 0.48495451364590625, "grad_norm": 0.006465044338256121, "kl": 0.04398878663778305, "learning_rate": 2.0944983823752663e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 693 }, { "completion_length": 194.00001525878906, "epoch": 0.48565430370888735, "grad_norm": 1.1355234384536743, "kl": 0.047919515520334244, "learning_rate": 2.0901061576695752e-07, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 694 }, { "completion_length": 182.6428680419922, "epoch": 0.48635409377186845, "grad_norm": 0.007984691299498081, "kl": 0.05330199375748634, "learning_rate": 2.0857134974624557e-07, "loss": 0.0001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 695 }, { "completion_length": 199.00001525878906, "epoch": 0.48705388383484954, "grad_norm": 1.6554975509643555, "kl": 0.03912676125764847, "learning_rate": 2.0813204229845298e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 696 }, { "completion_length": 170.6428680419922, "epoch": 0.48775367389783064, "grad_norm": 2.764085054397583, "kl": 0.06136271730065346, "learning_rate": 2.0769269554684228e-07, "loss": 0.0001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 697 }, { "completion_length": 209.6428680419922, "epoch": 0.48845346396081174, "grad_norm": 1.0625824928283691, "kl": 0.03584383800625801, "learning_rate": 2.0725331161486577e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 698 }, { "completion_length": 203.07144165039062, "epoch": 0.48915325402379284, "grad_norm": 0.005635551642626524, "kl": 0.03456791117787361, "learning_rate": 2.068138926261557e-07, "loss": 0.0, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "step": 699 }, { "completion_length": 155.07144165039062, "epoch": 0.489853044086774, "grad_norm": 1.5510985851287842, "kl": 0.06770531088113785, "learning_rate": 2.063744407045134e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 700 } ], "logging_steps": 1, "max_steps": 1429, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }