diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.489853044086774, + "eval_steps": 500, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "completion_length": 162.42857360839844, + "epoch": 0.0006997900629811056, + "grad_norm": 2.1621668338775635, + "kl": 0.0, + "learning_rate": 3.9999951667961485e-07, + "loss": -0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 1 + }, + { + "completion_length": 162.42857360839844, + "epoch": 0.0013995801259622112, + "grad_norm": 2.071769952774048, + "kl": 0.0005087637691758573, + "learning_rate": 3.9999806672079545e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 2 + }, + { + "completion_length": 151.2857208251953, + "epoch": 0.002099370188943317, + "grad_norm": 1.5806993246078491, + "kl": 0.0004339259467087686, + "learning_rate": 3.999956501305496e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 3 + }, + { + "completion_length": 185.35714721679688, + "epoch": 0.0027991602519244225, + "grad_norm": 3.6436660289764404, + "kl": 0.0005974674131721258, + "learning_rate": 3.9999226692055735e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 4 + }, + { + "completion_length": 205.42857360839844, + "epoch": 0.0034989503149055285, + "grad_norm": 2.561729669570923, + "kl": 0.0006411899230442941, + "learning_rate": 3.9998791710717035e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 5 + }, + { + "completion_length": 167.07144165039062, + "epoch": 0.004198740377886634, + "grad_norm": 2.539142370223999, + "kl": 0.0005450592143461108, + "learning_rate": 3.9998260071141214e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 6 + }, + { + "completion_length": 151.71429443359375, + "epoch": 0.00489853044086774, + "grad_norm": 3.034393548965454, + "kl": 0.0005159592255949974, + "learning_rate": 3.99976317758978e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 7 + }, + { + "completion_length": 184.71429443359375, + "epoch": 0.005598320503848845, + "grad_norm": 1.3017289638519287, + "kl": 0.000647695385850966, + "learning_rate": 3.999690682802346e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 8 + }, + { + "completion_length": 180.00001525878906, + "epoch": 0.006298110566829951, + "grad_norm": 1.3137158155441284, + "kl": 0.0006165299564599991, + "learning_rate": 3.999608523102203e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 9 + }, + { + "completion_length": 169.85714721679688, + "epoch": 0.006997900629811057, + "grad_norm": 2.563966751098633, + "kl": 0.0006137760356068611, + "learning_rate": 3.999516698886445e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 10 + }, + { + "completion_length": 181.42857360839844, + "epoch": 0.007697690692792162, + "grad_norm": 2.386747360229492, + "kl": 0.0006042409222573042, + "learning_rate": 3.9994152105988764e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 11 + }, + { + "completion_length": 178.42857360839844, + "epoch": 0.008397480755773267, + "grad_norm": 2.401819944381714, + "kl": 0.0007052791188471019, + "learning_rate": 3.999304058730011e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 12 + }, + { + "completion_length": 154.92857360839844, + "epoch": 0.009097270818754374, + "grad_norm": 2.1253886222839355, + "kl": 0.000624538806732744, + "learning_rate": 3.99918324381707e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 13 + }, + { + "completion_length": 188.57144165039062, + "epoch": 0.00979706088173548, + "grad_norm": 2.377917528152466, + "kl": 0.0006567585514858365, + "learning_rate": 3.9990527664439747e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 14 + }, + { + "completion_length": 143.57144165039062, + "epoch": 0.010496850944716585, + "grad_norm": 3.380934476852417, + "kl": 0.0009870762005448341, + "learning_rate": 3.998912627241349e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 15 + }, + { + "completion_length": 164.6428680419922, + "epoch": 0.01119664100769769, + "grad_norm": 2.6711244583129883, + "kl": 0.0007087168050929904, + "learning_rate": 3.998762826886515e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 16 + }, + { + "completion_length": 153.85714721679688, + "epoch": 0.011896431070678797, + "grad_norm": 2.1802475452423096, + "kl": 0.001023497898131609, + "learning_rate": 3.9986033661034884e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 17 + }, + { + "completion_length": 184.07144165039062, + "epoch": 0.012596221133659902, + "grad_norm": 1.7185988426208496, + "kl": 0.0007585805142298341, + "learning_rate": 3.998434245662975e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 18 + }, + { + "completion_length": 154.7857208251953, + "epoch": 0.013296011196641007, + "grad_norm": 2.969214677810669, + "kl": 0.0007965491386130452, + "learning_rate": 3.9982554663823683e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 19 + }, + { + "completion_length": 176.71429443359375, + "epoch": 0.013995801259622114, + "grad_norm": 2.431178331375122, + "kl": 0.0006497688591480255, + "learning_rate": 3.998067029125746e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 20 + }, + { + "completion_length": 176.85714721679688, + "epoch": 0.01469559132260322, + "grad_norm": 1.35764741897583, + "kl": 0.0006702968385070562, + "learning_rate": 3.997868934803863e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 21 + }, + { + "completion_length": 153.7857208251953, + "epoch": 0.015395381385584325, + "grad_norm": 1.6852751970291138, + "kl": 0.0009988198289647698, + "learning_rate": 3.9976611843741495e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 22 + }, + { + "completion_length": 156.1428680419922, + "epoch": 0.01609517144856543, + "grad_norm": 0.9442471265792847, + "kl": 0.0010655602673068643, + "learning_rate": 3.9974437788407063e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 23 + }, + { + "completion_length": 175.7857208251953, + "epoch": 0.016794961511546535, + "grad_norm": 2.5566701889038086, + "kl": 0.0009062529425136745, + "learning_rate": 3.9972167192542977e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 24 + }, + { + "completion_length": 193.6428680419922, + "epoch": 0.01749475157452764, + "grad_norm": 2.0270092487335205, + "kl": 0.0006580596673302352, + "learning_rate": 3.99698000671235e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 25 + }, + { + "completion_length": 166.71429443359375, + "epoch": 0.01819454163750875, + "grad_norm": 2.980055809020996, + "kl": 0.0011716507142409682, + "learning_rate": 3.9967336423589423e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 26 + }, + { + "completion_length": 185.1428680419922, + "epoch": 0.018894331700489854, + "grad_norm": 2.762129068374634, + "kl": 0.0011772769503295422, + "learning_rate": 3.996477627384804e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 27 + }, + { + "completion_length": 169.42857360839844, + "epoch": 0.01959412176347096, + "grad_norm": 3.0474092960357666, + "kl": 0.0014008829602971673, + "learning_rate": 3.996211963027308e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 28 + }, + { + "completion_length": 165.71429443359375, + "epoch": 0.020293911826452064, + "grad_norm": 0.002185442950576544, + "kl": 0.0014958097599446774, + "learning_rate": 3.9959366505704645e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 29 + }, + { + "completion_length": 152.07144165039062, + "epoch": 0.02099370188943317, + "grad_norm": 3.3163719177246094, + "kl": 0.0014360197819769382, + "learning_rate": 3.9956516913449133e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 30 + }, + { + "completion_length": 156.57144165039062, + "epoch": 0.021693491952414275, + "grad_norm": 2.908538579940796, + "kl": 0.0014536671806126833, + "learning_rate": 3.9953570867279217e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 31 + }, + { + "completion_length": 150.6428680419922, + "epoch": 0.02239328201539538, + "grad_norm": 5.655108451843262, + "kl": 0.0020901458337903023, + "learning_rate": 3.995052838143374e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 32 + }, + { + "completion_length": 141.5, + "epoch": 0.02309307207837649, + "grad_norm": 3.3134255409240723, + "kl": 0.0023269259836524725, + "learning_rate": 3.994738947061765e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 33 + }, + { + "completion_length": 162.21429443359375, + "epoch": 0.023792862141357594, + "grad_norm": 2.6442935466766357, + "kl": 0.0024486398324370384, + "learning_rate": 3.994415415000195e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 34 + }, + { + "completion_length": 152.2857208251953, + "epoch": 0.0244926522043387, + "grad_norm": 2.7008283138275146, + "kl": 0.002697949530556798, + "learning_rate": 3.994082243522359e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 35 + }, + { + "completion_length": 148.0, + "epoch": 0.025192442267319804, + "grad_norm": 3.7234046459198, + "kl": 0.003130936762318015, + "learning_rate": 3.993739434238544e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 36 + }, + { + "completion_length": 172.21429443359375, + "epoch": 0.02589223233030091, + "grad_norm": 1.788806438446045, + "kl": 0.0019818381406366825, + "learning_rate": 3.993386988805617e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 37 + }, + { + "completion_length": 158.35714721679688, + "epoch": 0.026592022393282014, + "grad_norm": 4.708991050720215, + "kl": 0.0029942409601062536, + "learning_rate": 3.993024908927018e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 38 + }, + { + "completion_length": 166.92857360839844, + "epoch": 0.02729181245626312, + "grad_norm": 1.553680419921875, + "kl": 0.0026708380319178104, + "learning_rate": 3.992653196352753e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 39 + }, + { + "completion_length": 154.42857360839844, + "epoch": 0.02799160251924423, + "grad_norm": 1.7119766473770142, + "kl": 0.005624197889119387, + "learning_rate": 3.992271852879386e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 40 + }, + { + "completion_length": 200.21429443359375, + "epoch": 0.028691392582225334, + "grad_norm": 1.4790523052215576, + "kl": 0.0046756877563893795, + "learning_rate": 3.991880880350026e-07, + "loss": 0.0, + "reward": 0.0714285746216774, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.0714285746216774, + "step": 41 + }, + { + "completion_length": 184.07144165039062, + "epoch": 0.02939118264520644, + "grad_norm": 1.7897188663482666, + "kl": 0.005358730908483267, + "learning_rate": 3.991480280654323e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 42 + }, + { + "completion_length": 175.35714721679688, + "epoch": 0.030090972708187544, + "grad_norm": 1.5548423528671265, + "kl": 0.007951375097036362, + "learning_rate": 3.9910700557284576e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 43 + }, + { + "completion_length": 155.6428680419922, + "epoch": 0.03079076277116865, + "grad_norm": 2.725590944290161, + "kl": 0.008790958672761917, + "learning_rate": 3.990650207555131e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 44 + }, + { + "completion_length": 192.21429443359375, + "epoch": 0.031490552834149754, + "grad_norm": 2.051744222640991, + "kl": 0.0023416366893798113, + "learning_rate": 3.990220738163554e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 45 + }, + { + "completion_length": 157.7857208251953, + "epoch": 0.03219034289713086, + "grad_norm": 2.221405267715454, + "kl": 0.007256282493472099, + "learning_rate": 3.9897816496294406e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 46 + }, + { + "completion_length": 165.92857360839844, + "epoch": 0.032890132960111965, + "grad_norm": 2.4208436012268066, + "kl": 0.009219921194016933, + "learning_rate": 3.989332944074994e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 47 + }, + { + "completion_length": 142.35714721679688, + "epoch": 0.03358992302309307, + "grad_norm": 2.5692453384399414, + "kl": 0.011153223924338818, + "learning_rate": 3.988874623668901e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 48 + }, + { + "completion_length": 172.6428680419922, + "epoch": 0.034289713086074175, + "grad_norm": 3.8478963375091553, + "kl": 0.005089076701551676, + "learning_rate": 3.988406690626316e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 49 + }, + { + "completion_length": 153.07144165039062, + "epoch": 0.03498950314905528, + "grad_norm": 1.5168204307556152, + "kl": 0.01330635230988264, + "learning_rate": 3.987929147208857e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 50 + }, + { + "completion_length": 154.35714721679688, + "epoch": 0.03568929321203639, + "grad_norm": 2.3415682315826416, + "kl": 0.018696671351790428, + "learning_rate": 3.9874419957245866e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 51 + }, + { + "completion_length": 168.92857360839844, + "epoch": 0.0363890832750175, + "grad_norm": 3.473439931869507, + "kl": 0.014826941303908825, + "learning_rate": 3.9869452385280085e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 52 + }, + { + "completion_length": 148.35714721679688, + "epoch": 0.0370888733379986, + "grad_norm": 1.529994010925293, + "kl": 0.0183956827968359, + "learning_rate": 3.986438878020051e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 53 + }, + { + "completion_length": 190.7857208251953, + "epoch": 0.03778866340097971, + "grad_norm": 0.7264643907546997, + "kl": 0.010657703503966331, + "learning_rate": 3.9859229166480574e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 54 + }, + { + "completion_length": 146.1428680419922, + "epoch": 0.03848845346396081, + "grad_norm": 2.617443323135376, + "kl": 0.011645260266959667, + "learning_rate": 3.985397356905774e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 55 + }, + { + "completion_length": 152.6428680419922, + "epoch": 0.03918824352694192, + "grad_norm": 4.20578670501709, + "kl": 0.021926531568169594, + "learning_rate": 3.984862201333339e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 56 + }, + { + "completion_length": 163.1428680419922, + "epoch": 0.03988803358992302, + "grad_norm": 3.8608505725860596, + "kl": 0.015257592312991619, + "learning_rate": 3.984317452517268e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 57 + }, + { + "completion_length": 135.07144165039062, + "epoch": 0.04058782365290413, + "grad_norm": 1.4822739362716675, + "kl": 0.017864219844341278, + "learning_rate": 3.983763113090443e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 58 + }, + { + "completion_length": 152.42857360839844, + "epoch": 0.041287613715885234, + "grad_norm": 3.3999340534210205, + "kl": 0.019019659608602524, + "learning_rate": 3.9831991857320996e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.5050762891769409, + "rewards/check_gptzero_func": 0.5, + "step": 59 + }, + { + "completion_length": 183.35714721679688, + "epoch": 0.04198740377886634, + "grad_norm": 2.194365978240967, + "kl": 0.0204194076359272, + "learning_rate": 3.982625673167814e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 60 + }, + { + "completion_length": 146.92857360839844, + "epoch": 0.042687193841847444, + "grad_norm": 3.065058469772339, + "kl": 0.03266499191522598, + "learning_rate": 3.982042578169488e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 61 + }, + { + "completion_length": 171.6428680419922, + "epoch": 0.04338698390482855, + "grad_norm": 1.8983697891235352, + "kl": 0.03760524466633797, + "learning_rate": 3.9814499035553407e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 62 + }, + { + "completion_length": 169.21429443359375, + "epoch": 0.044086773967809655, + "grad_norm": 0.006249432452023029, + "kl": 0.012164854444563389, + "learning_rate": 3.980847652189887e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 63 + }, + { + "completion_length": 193.92857360839844, + "epoch": 0.04478656403079076, + "grad_norm": 3.6738457679748535, + "kl": 0.01014101505279541, + "learning_rate": 3.9802358269839326e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 64 + }, + { + "completion_length": 168.7857208251953, + "epoch": 0.04548635409377187, + "grad_norm": 1.8547803163528442, + "kl": 0.018301136791706085, + "learning_rate": 3.9796144308945525e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 65 + }, + { + "completion_length": 173.07144165039062, + "epoch": 0.04618614415675298, + "grad_norm": 2.4389915466308594, + "kl": 0.014962972141802311, + "learning_rate": 3.97898346692508e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 66 + }, + { + "completion_length": 161.6428680419922, + "epoch": 0.04688593421973408, + "grad_norm": 2.174207925796509, + "kl": 0.021594949066638947, + "learning_rate": 3.9783429381250933e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 67 + }, + { + "completion_length": 154.2857208251953, + "epoch": 0.04758572428271519, + "grad_norm": 3.0958943367004395, + "kl": 0.029363546520471573, + "learning_rate": 3.9776928475904e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 68 + }, + { + "completion_length": 174.00001525878906, + "epoch": 0.04828551434569629, + "grad_norm": 0.006897146813571453, + "kl": 0.012384669855237007, + "learning_rate": 3.977033198463017e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 69 + }, + { + "completion_length": 158.85714721679688, + "epoch": 0.0489853044086774, + "grad_norm": 3.0664570331573486, + "kl": 0.022730212658643723, + "learning_rate": 3.976363993931166e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 70 + }, + { + "completion_length": 182.00001525878906, + "epoch": 0.0496850944716585, + "grad_norm": 4.6477837562561035, + "kl": 0.016789477318525314, + "learning_rate": 3.975685237229247e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.5050762891769409, + "rewards/check_gptzero_func": 0.5, + "step": 71 + }, + { + "completion_length": 156.21429443359375, + "epoch": 0.05038488453463961, + "grad_norm": 1.9391226768493652, + "kl": 0.01738656684756279, + "learning_rate": 3.974996931637831e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 72 + }, + { + "completion_length": 179.1428680419922, + "epoch": 0.05108467459762071, + "grad_norm": 2.370103597640991, + "kl": 0.020562436431646347, + "learning_rate": 3.974299080483638e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 73 + }, + { + "completion_length": 146.85714721679688, + "epoch": 0.05178446466060182, + "grad_norm": 3.4054484367370605, + "kl": 0.012689548544585705, + "learning_rate": 3.9735916871395254e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 74 + }, + { + "completion_length": 168.7857208251953, + "epoch": 0.052484254723582924, + "grad_norm": 3.058872699737549, + "kl": 0.020301401615142822, + "learning_rate": 3.9728747550244695e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 75 + }, + { + "completion_length": 173.00001525878906, + "epoch": 0.05318404478656403, + "grad_norm": 2.662966251373291, + "kl": 0.013862676918506622, + "learning_rate": 3.9721482876035494e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 76 + }, + { + "completion_length": 155.7857208251953, + "epoch": 0.053883834849545134, + "grad_norm": 2.59771728515625, + "kl": 0.01696676015853882, + "learning_rate": 3.9714122883879304e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 77 + }, + { + "completion_length": 132.35714721679688, + "epoch": 0.05458362491252624, + "grad_norm": 3.342616081237793, + "kl": 0.02708171308040619, + "learning_rate": 3.9706667609348455e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 78 + }, + { + "completion_length": 189.6428680419922, + "epoch": 0.055283414975507345, + "grad_norm": 2.427316427230835, + "kl": 0.013700015842914581, + "learning_rate": 3.969911708847582e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 79 + }, + { + "completion_length": 151.42857360839844, + "epoch": 0.05598320503848846, + "grad_norm": 2.5900068283081055, + "kl": 0.022396380081772804, + "learning_rate": 3.9691471357754615e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 80 + }, + { + "completion_length": 143.6428680419922, + "epoch": 0.05668299510146956, + "grad_norm": 2.744997024536133, + "kl": 0.03789540007710457, + "learning_rate": 3.968373045413819e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 81 + }, + { + "completion_length": 164.6428680419922, + "epoch": 0.05738278516445067, + "grad_norm": 3.3995862007141113, + "kl": 0.016367558389902115, + "learning_rate": 3.967589441503993e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 82 + }, + { + "completion_length": 171.6428680419922, + "epoch": 0.05808257522743177, + "grad_norm": 1.4792157411575317, + "kl": 0.033240318298339844, + "learning_rate": 3.9667963278333005e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 83 + }, + { + "completion_length": 191.00001525878906, + "epoch": 0.05878236529041288, + "grad_norm": 1.0607832670211792, + "kl": 0.009467127732932568, + "learning_rate": 3.965993708235021e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 84 + }, + { + "completion_length": 150.5, + "epoch": 0.05948215535339398, + "grad_norm": 2.3551836013793945, + "kl": 0.04632449522614479, + "learning_rate": 3.965181586588379e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 85 + }, + { + "completion_length": 162.21429443359375, + "epoch": 0.06018194541637509, + "grad_norm": 2.594930410385132, + "kl": 0.039581455290317535, + "learning_rate": 3.964359966818524e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 86 + }, + { + "completion_length": 149.1428680419922, + "epoch": 0.06088173547935619, + "grad_norm": 5.078446388244629, + "kl": 0.04121645539999008, + "learning_rate": 3.963528852896512e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 87 + }, + { + "completion_length": 177.42857360839844, + "epoch": 0.0615815255423373, + "grad_norm": 4.284796237945557, + "kl": 0.015902357175946236, + "learning_rate": 3.962688248839286e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 88 + }, + { + "completion_length": 174.1428680419922, + "epoch": 0.0622813156053184, + "grad_norm": 2.436661720275879, + "kl": 0.022723663598299026, + "learning_rate": 3.961838158709656e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 89 + }, + { + "completion_length": 169.92857360839844, + "epoch": 0.06298110566829951, + "grad_norm": 3.116161823272705, + "kl": 0.02194351516664028, + "learning_rate": 3.9609785866162825e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 90 + }, + { + "completion_length": 180.00001525878906, + "epoch": 0.06368089573128062, + "grad_norm": 1.5481808185577393, + "kl": 0.01771431416273117, + "learning_rate": 3.96010953671365e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 91 + }, + { + "completion_length": 161.57144165039062, + "epoch": 0.06438068579426172, + "grad_norm": 2.0321083068847656, + "kl": 0.028998972848057747, + "learning_rate": 3.959231013202057e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 92 + }, + { + "completion_length": 171.71429443359375, + "epoch": 0.06508047585724283, + "grad_norm": 1.2014678716659546, + "kl": 0.026206741109490395, + "learning_rate": 3.958343020327585e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 93 + }, + { + "completion_length": 191.6428680419922, + "epoch": 0.06578026592022393, + "grad_norm": 2.0394017696380615, + "kl": 0.014384634792804718, + "learning_rate": 3.957445562382084e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 94 + }, + { + "completion_length": 149.7857208251953, + "epoch": 0.06648005598320504, + "grad_norm": 5.294133186340332, + "kl": 0.028008148074150085, + "learning_rate": 3.9565386437031525e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 95 + }, + { + "completion_length": 174.6428680419922, + "epoch": 0.06717984604618614, + "grad_norm": 1.9654920101165771, + "kl": 0.02754833921790123, + "learning_rate": 3.955622268674113e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 96 + }, + { + "completion_length": 131.35714721679688, + "epoch": 0.06787963610916725, + "grad_norm": 0.013575117103755474, + "kl": 0.05109896883368492, + "learning_rate": 3.954696441723992e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 97 + }, + { + "completion_length": 208.42857360839844, + "epoch": 0.06857942617214835, + "grad_norm": 0.0030638063326478004, + "kl": 0.016210131347179413, + "learning_rate": 3.9537611673275013e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 98 + }, + { + "completion_length": 183.92857360839844, + "epoch": 0.06927921623512946, + "grad_norm": 2.51125168800354, + "kl": 0.023246267810463905, + "learning_rate": 3.9528164500050114e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 99 + }, + { + "completion_length": 180.57144165039062, + "epoch": 0.06997900629811056, + "grad_norm": 2.352588415145874, + "kl": 0.0423022024333477, + "learning_rate": 3.951862294322534e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 100 + }, + { + "completion_length": 157.57144165039062, + "epoch": 0.07067879636109167, + "grad_norm": 1.5895036458969116, + "kl": 0.03580068424344063, + "learning_rate": 3.9508987048916987e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 101 + }, + { + "completion_length": 168.35714721679688, + "epoch": 0.07137858642407278, + "grad_norm": 0.005004839040338993, + "kl": 0.02044336311519146, + "learning_rate": 3.949925686369729e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 102 + }, + { + "completion_length": 188.00001525878906, + "epoch": 0.07207837648705388, + "grad_norm": 0.6585327982902527, + "kl": 0.028955884277820587, + "learning_rate": 3.948943243459422e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 103 + }, + { + "completion_length": 149.71429443359375, + "epoch": 0.072778166550035, + "grad_norm": 2.492542266845703, + "kl": 0.01728636771440506, + "learning_rate": 3.947951380909125e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 104 + }, + { + "completion_length": 189.00001525878906, + "epoch": 0.0734779566130161, + "grad_norm": 0.7788301110267639, + "kl": 0.017463039606809616, + "learning_rate": 3.946950103512711e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 105 + }, + { + "completion_length": 198.50001525878906, + "epoch": 0.0741777466759972, + "grad_norm": 2.385542154312134, + "kl": 0.009109357371926308, + "learning_rate": 3.945939416109558e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 106 + }, + { + "completion_length": 168.0, + "epoch": 0.0748775367389783, + "grad_norm": 0.005704585462808609, + "kl": 0.03086216188967228, + "learning_rate": 3.944919323584525e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 107 + }, + { + "completion_length": 179.1428680419922, + "epoch": 0.07557732680195942, + "grad_norm": 1.7662243843078613, + "kl": 0.022440915927290916, + "learning_rate": 3.943889830867926e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 108 + }, + { + "completion_length": 159.5, + "epoch": 0.07627711686494051, + "grad_norm": 2.5273375511169434, + "kl": 0.03617139905691147, + "learning_rate": 3.94285094293551e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 109 + }, + { + "completion_length": 212.21429443359375, + "epoch": 0.07697690692792163, + "grad_norm": 0.013249583542346954, + "kl": 0.021984193474054337, + "learning_rate": 3.941802664808434e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 110 + }, + { + "completion_length": 175.85714721679688, + "epoch": 0.07767669699090272, + "grad_norm": 4.031686782836914, + "kl": 0.032846599817276, + "learning_rate": 3.94074500155324e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 111 + }, + { + "completion_length": 187.07144165039062, + "epoch": 0.07837648705388384, + "grad_norm": 0.007363871671259403, + "kl": 0.024431224912405014, + "learning_rate": 3.9396779582818294e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 112 + }, + { + "completion_length": 198.07144165039062, + "epoch": 0.07907627711686493, + "grad_norm": 4.20402717590332, + "kl": 0.015820473432540894, + "learning_rate": 3.9386015401514403e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 113 + }, + { + "completion_length": 196.21429443359375, + "epoch": 0.07977606717984605, + "grad_norm": 3.4518990516662598, + "kl": 0.04174185171723366, + "learning_rate": 3.937515752364621e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 114 + }, + { + "completion_length": 185.6428680419922, + "epoch": 0.08047585724282715, + "grad_norm": 0.0049303495325148106, + "kl": 0.026473047211766243, + "learning_rate": 3.936420600169205e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 115 + }, + { + "completion_length": 164.2857208251953, + "epoch": 0.08117564730580826, + "grad_norm": 0.13479961454868317, + "kl": 0.020081352442502975, + "learning_rate": 3.935316088858287e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 116 + }, + { + "completion_length": 180.7857208251953, + "epoch": 0.08187543736878937, + "grad_norm": 0.0093807028606534, + "kl": 0.03584766387939453, + "learning_rate": 3.9342022237701944e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 117 + }, + { + "completion_length": 185.00001525878906, + "epoch": 0.08257522743177047, + "grad_norm": 0.01810525357723236, + "kl": 0.04101763665676117, + "learning_rate": 3.933079010288464e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 118 + }, + { + "completion_length": 186.6428680419922, + "epoch": 0.08327501749475158, + "grad_norm": 0.00445236312225461, + "kl": 0.021929247304797173, + "learning_rate": 3.931946453841817e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 119 + }, + { + "completion_length": 187.57144165039062, + "epoch": 0.08397480755773268, + "grad_norm": 2.281690835952759, + "kl": 0.017530912533402443, + "learning_rate": 3.9308045599041273e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 120 + }, + { + "completion_length": 210.92857360839844, + "epoch": 0.08467459762071379, + "grad_norm": 0.003424593713134527, + "kl": 0.008928514085710049, + "learning_rate": 3.9296533339944037e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 121 + }, + { + "completion_length": 184.21429443359375, + "epoch": 0.08537438768369489, + "grad_norm": 2.1026535034179688, + "kl": 0.020458171144127846, + "learning_rate": 3.928492781676753e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 122 + }, + { + "completion_length": 199.07144165039062, + "epoch": 0.086074177746676, + "grad_norm": 1.0709642171859741, + "kl": 0.010702777653932571, + "learning_rate": 3.927322908560363e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 123 + }, + { + "completion_length": 170.6428680419922, + "epoch": 0.0867739678096571, + "grad_norm": 1.365310549736023, + "kl": 0.025576891377568245, + "learning_rate": 3.926143720299469e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 124 + }, + { + "completion_length": 205.85714721679688, + "epoch": 0.08747375787263821, + "grad_norm": 2.0331332683563232, + "kl": 0.01353074237704277, + "learning_rate": 3.9249552225933275e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 125 + }, + { + "completion_length": 193.57144165039062, + "epoch": 0.08817354793561931, + "grad_norm": 1.3904906511306763, + "kl": 0.026216160506010056, + "learning_rate": 3.92375742118619e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 126 + }, + { + "completion_length": 141.1428680419922, + "epoch": 0.08887333799860042, + "grad_norm": 0.0112815722823143, + "kl": 0.025340013206005096, + "learning_rate": 3.922550321867275e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 127 + }, + { + "completion_length": 141.6428680419922, + "epoch": 0.08957312806158152, + "grad_norm": 4.012102127075195, + "kl": 0.04771756753325462, + "learning_rate": 3.9213339304707405e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 128 + }, + { + "completion_length": 206.57144165039062, + "epoch": 0.09027291812456263, + "grad_norm": 2.5189507007598877, + "kl": 0.01796441338956356, + "learning_rate": 3.920108252875653e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 129 + }, + { + "completion_length": 165.21429443359375, + "epoch": 0.09097270818754374, + "grad_norm": 2.2849366664886475, + "kl": 0.047430619597435, + "learning_rate": 3.9188732950059626e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 130 + }, + { + "completion_length": 191.21429443359375, + "epoch": 0.09167249825052484, + "grad_norm": 1.8176424503326416, + "kl": 0.01865430921316147, + "learning_rate": 3.9176290628304724e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 131 + }, + { + "completion_length": 184.21429443359375, + "epoch": 0.09237228831350595, + "grad_norm": 1.6426111459732056, + "kl": 0.030585745349526405, + "learning_rate": 3.91637556236281e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 132 + }, + { + "completion_length": 175.57144165039062, + "epoch": 0.09307207837648705, + "grad_norm": 1.48448646068573, + "kl": 0.024254297837615013, + "learning_rate": 3.9151127996613994e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 133 + }, + { + "completion_length": 204.1428680419922, + "epoch": 0.09377186843946816, + "grad_norm": 1.7606929540634155, + "kl": 0.0255893561989069, + "learning_rate": 3.9138407808294287e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 134 + }, + { + "completion_length": 189.57144165039062, + "epoch": 0.09447165850244926, + "grad_norm": 0.007255424279719591, + "kl": 0.020462684333324432, + "learning_rate": 3.912559512014826e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 135 + }, + { + "completion_length": 170.6428680419922, + "epoch": 0.09517144856543037, + "grad_norm": 0.008822077885270119, + "kl": 0.02918427065014839, + "learning_rate": 3.9112689994102233e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 136 + }, + { + "completion_length": 194.7857208251953, + "epoch": 0.09587123862841147, + "grad_norm": 1.4406437873840332, + "kl": 0.0185464546084404, + "learning_rate": 3.9099692492529324e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 137 + }, + { + "completion_length": 186.92857360839844, + "epoch": 0.09657102869139259, + "grad_norm": 0.8168829083442688, + "kl": 0.022399934008717537, + "learning_rate": 3.908660267824909e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 138 + }, + { + "completion_length": 197.50001525878906, + "epoch": 0.09727081875437368, + "grad_norm": 3.561955690383911, + "kl": 0.019386975094676018, + "learning_rate": 3.9073420614527284e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 139 + }, + { + "completion_length": 185.50001525878906, + "epoch": 0.0979706088173548, + "grad_norm": 3.1612038612365723, + "kl": 0.023975731804966927, + "learning_rate": 3.9060146365075506e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 140 + }, + { + "completion_length": 173.21429443359375, + "epoch": 0.0986703988803359, + "grad_norm": 2.205050468444824, + "kl": 0.028390221297740936, + "learning_rate": 3.9046779994050905e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 141 + }, + { + "completion_length": 182.2857208251953, + "epoch": 0.099370188943317, + "grad_norm": 1.76483154296875, + "kl": 0.024142036214470863, + "learning_rate": 3.903332156605588e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 142 + }, + { + "completion_length": 202.42857360839844, + "epoch": 0.1000699790062981, + "grad_norm": 1.3527218103408813, + "kl": 0.02578994818031788, + "learning_rate": 3.9019771146137757e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 143 + }, + { + "completion_length": 184.50001525878906, + "epoch": 0.10076976906927922, + "grad_norm": 0.004584628622978926, + "kl": 0.012519586831331253, + "learning_rate": 3.9006128799788475e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 144 + }, + { + "completion_length": 212.92857360839844, + "epoch": 0.10146955913226033, + "grad_norm": 0.5000512599945068, + "kl": 0.00770693551748991, + "learning_rate": 3.899239459294428e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 145 + }, + { + "completion_length": 200.92857360839844, + "epoch": 0.10216934919524143, + "grad_norm": 0.9776932597160339, + "kl": 0.01647227071225643, + "learning_rate": 3.897856859198539e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 146 + }, + { + "completion_length": 190.85714721679688, + "epoch": 0.10286913925822254, + "grad_norm": 0.9597777128219604, + "kl": 0.02409287542104721, + "learning_rate": 3.896465086373569e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 147 + }, + { + "completion_length": 232.2857208251953, + "epoch": 0.10356892932120364, + "grad_norm": 0.0023802323266863823, + "kl": 0.00452839769423008, + "learning_rate": 3.8950641475462394e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 148 + }, + { + "completion_length": 150.35714721679688, + "epoch": 0.10426871938418475, + "grad_norm": 0.00779814412817359, + "kl": 0.032739609479904175, + "learning_rate": 3.893654049487573e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 149 + }, + { + "completion_length": 179.2857208251953, + "epoch": 0.10496850944716585, + "grad_norm": 2.1413512229919434, + "kl": 0.016808386892080307, + "learning_rate": 3.8922347990128616e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 150 + }, + { + "completion_length": 173.42857360839844, + "epoch": 0.10566829951014696, + "grad_norm": 1.4173649549484253, + "kl": 0.028368493542075157, + "learning_rate": 3.8908064029816315e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 151 + }, + { + "completion_length": 171.7857208251953, + "epoch": 0.10636808957312806, + "grad_norm": 0.8318215608596802, + "kl": 0.022898903116583824, + "learning_rate": 3.889368868297612e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 152 + }, + { + "completion_length": 159.6428680419922, + "epoch": 0.10706787963610917, + "grad_norm": 0.011626004241406918, + "kl": 0.032634247094392776, + "learning_rate": 3.887922201908703e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 153 + }, + { + "completion_length": 182.85714721679688, + "epoch": 0.10776766969909027, + "grad_norm": 1.0654501914978027, + "kl": 0.016384759917855263, + "learning_rate": 3.886466410806935e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 154 + }, + { + "completion_length": 175.50001525878906, + "epoch": 0.10846745976207138, + "grad_norm": 2.0227701663970947, + "kl": 0.017170391976833344, + "learning_rate": 3.885001502028445e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 155 + }, + { + "completion_length": 189.50001525878906, + "epoch": 0.10916724982505248, + "grad_norm": 2.12776255607605, + "kl": 0.01205319631844759, + "learning_rate": 3.8835274826534363e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 156 + }, + { + "completion_length": 202.2857208251953, + "epoch": 0.10986703988803359, + "grad_norm": 1.1410002708435059, + "kl": 0.015749162063002586, + "learning_rate": 3.882044359806143e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 157 + }, + { + "completion_length": 174.07144165039062, + "epoch": 0.11056682995101469, + "grad_norm": 1.730547308921814, + "kl": 0.018030354753136635, + "learning_rate": 3.8805521406548025e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 158 + }, + { + "completion_length": 164.6428680419922, + "epoch": 0.1112666200139958, + "grad_norm": 0.00942758284509182, + "kl": 0.026347342878580093, + "learning_rate": 3.879050832411613e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 159 + }, + { + "completion_length": 178.21429443359375, + "epoch": 0.11196641007697691, + "grad_norm": 0.003551044035702944, + "kl": 0.012404139153659344, + "learning_rate": 3.8775404423327025e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 160 + }, + { + "completion_length": 176.6428680419922, + "epoch": 0.11266620013995801, + "grad_norm": 0.8654048442840576, + "kl": 0.018997181206941605, + "learning_rate": 3.876020977718095e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 161 + }, + { + "completion_length": 173.35714721679688, + "epoch": 0.11336599020293912, + "grad_norm": 1.4962221384048462, + "kl": 0.014478221535682678, + "learning_rate": 3.874492445911673e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 162 + }, + { + "completion_length": 168.07144165039062, + "epoch": 0.11406578026592022, + "grad_norm": 0.006159429904073477, + "kl": 0.01945115439593792, + "learning_rate": 3.872954854301142e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 163 + }, + { + "completion_length": 191.92857360839844, + "epoch": 0.11476557032890133, + "grad_norm": 0.44639861583709717, + "kl": 0.00815486814826727, + "learning_rate": 3.8714082103179955e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 164 + }, + { + "completion_length": 156.57144165039062, + "epoch": 0.11546536039188243, + "grad_norm": 1.8552296161651611, + "kl": 0.024526473134756088, + "learning_rate": 3.8698525214374793e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 165 + }, + { + "completion_length": 167.5, + "epoch": 0.11616515045486354, + "grad_norm": 1.124375581741333, + "kl": 0.02470785565674305, + "learning_rate": 3.868287795178555e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 166 + }, + { + "completion_length": 223.57144165039062, + "epoch": 0.11686494051784464, + "grad_norm": 0.9268680810928345, + "kl": 0.012983305379748344, + "learning_rate": 3.866714039103864e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 167 + }, + { + "completion_length": 187.50001525878906, + "epoch": 0.11756473058082575, + "grad_norm": 1.2714390754699707, + "kl": 0.026302574202418327, + "learning_rate": 3.865131260819689e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 168 + }, + { + "completion_length": 209.50001525878906, + "epoch": 0.11826452064380685, + "grad_norm": 0.003561601508408785, + "kl": 0.01157893892377615, + "learning_rate": 3.8635394679759215e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 169 + }, + { + "completion_length": 185.92857360839844, + "epoch": 0.11896431070678797, + "grad_norm": 0.9095775485038757, + "kl": 0.02227042429149151, + "learning_rate": 3.8619386682660197e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 170 + }, + { + "completion_length": 160.21429443359375, + "epoch": 0.11966410076976906, + "grad_norm": 2.749595880508423, + "kl": 0.022995274513959885, + "learning_rate": 3.8603288694269744e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 171 + }, + { + "completion_length": 190.42857360839844, + "epoch": 0.12036389083275018, + "grad_norm": 0.002630816772580147, + "kl": 0.010235711000859737, + "learning_rate": 3.858710079239274e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 172 + }, + { + "completion_length": 164.0, + "epoch": 0.12106368089573127, + "grad_norm": 1.5362565517425537, + "kl": 0.025623667985200882, + "learning_rate": 3.85708230552686e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 173 + }, + { + "completion_length": 206.6428680419922, + "epoch": 0.12176347095871239, + "grad_norm": 1.2382439374923706, + "kl": 0.012417357414960861, + "learning_rate": 3.855445556157093e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 174 + }, + { + "completion_length": 175.7857208251953, + "epoch": 0.1224632610216935, + "grad_norm": 0.5511037111282349, + "kl": 0.02940467558801174, + "learning_rate": 3.853799839040719e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 175 + }, + { + "completion_length": 183.71429443359375, + "epoch": 0.1231630510846746, + "grad_norm": 0.010460534133017063, + "kl": 0.026606591418385506, + "learning_rate": 3.8521451621318233e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 176 + }, + { + "completion_length": 190.57144165039062, + "epoch": 0.12386284114765571, + "grad_norm": 0.005347545258700848, + "kl": 0.022543279454112053, + "learning_rate": 3.850481533427796e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 177 + }, + { + "completion_length": 214.42857360839844, + "epoch": 0.1245626312106368, + "grad_norm": 1.7615028619766235, + "kl": 0.01479637436568737, + "learning_rate": 3.848808960969295e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 178 + }, + { + "completion_length": 174.6428680419922, + "epoch": 0.12526242127361792, + "grad_norm": 1.4247933626174927, + "kl": 0.024751055985689163, + "learning_rate": 3.8471274528402027e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 179 + }, + { + "completion_length": 206.7857208251953, + "epoch": 0.12596221133659902, + "grad_norm": 1.9691680669784546, + "kl": 0.01430103275924921, + "learning_rate": 3.845437017167592e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 180 + }, + { + "completion_length": 203.92857360839844, + "epoch": 0.12666200139958012, + "grad_norm": 1.9248921871185303, + "kl": 0.010852116160094738, + "learning_rate": 3.843737662121682e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 181 + }, + { + "completion_length": 175.92857360839844, + "epoch": 0.12736179146256124, + "grad_norm": 1.3298709392547607, + "kl": 0.02915351092815399, + "learning_rate": 3.8420293959158023e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 182 + }, + { + "completion_length": 176.50001525878906, + "epoch": 0.12806158152554234, + "grad_norm": 0.0045565650798380375, + "kl": 0.015465345233678818, + "learning_rate": 3.840312226806352e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 183 + }, + { + "completion_length": 172.00001525878906, + "epoch": 0.12876137158852344, + "grad_norm": 0.007714143954217434, + "kl": 0.023726962506771088, + "learning_rate": 3.83858616309276e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 184 + }, + { + "completion_length": 182.2857208251953, + "epoch": 0.12946116165150454, + "grad_norm": 1.1713811159133911, + "kl": 0.027626153081655502, + "learning_rate": 3.8368512131174423e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 185 + }, + { + "completion_length": 190.2857208251953, + "epoch": 0.13016095171448566, + "grad_norm": 0.005629860796034336, + "kl": 0.020982127636671066, + "learning_rate": 3.835107385265767e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 186 + }, + { + "completion_length": 186.35714721679688, + "epoch": 0.13086074177746676, + "grad_norm": 0.008394072763621807, + "kl": 0.016412734985351562, + "learning_rate": 3.83335468796601e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 187 + }, + { + "completion_length": 198.35714721679688, + "epoch": 0.13156053184044786, + "grad_norm": 0.6422938108444214, + "kl": 0.01465323381125927, + "learning_rate": 3.831593129689314e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 188 + }, + { + "completion_length": 199.07144165039062, + "epoch": 0.13226032190342898, + "grad_norm": 0.7808562517166138, + "kl": 0.011609447188675404, + "learning_rate": 3.8298227189496494e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 189 + }, + { + "completion_length": 197.21429443359375, + "epoch": 0.13296011196641008, + "grad_norm": 0.5544365644454956, + "kl": 0.00919759925454855, + "learning_rate": 3.8280434643037723e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 190 + }, + { + "completion_length": 213.35714721679688, + "epoch": 0.13365990202939118, + "grad_norm": 1.302751898765564, + "kl": 0.017147762700915337, + "learning_rate": 3.8262553743511827e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 191 + }, + { + "completion_length": 182.85714721679688, + "epoch": 0.13435969209237228, + "grad_norm": 1.3094489574432373, + "kl": 0.018484842032194138, + "learning_rate": 3.824458457734084e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 192 + }, + { + "completion_length": 172.57144165039062, + "epoch": 0.1350594821553534, + "grad_norm": 0.009919717907905579, + "kl": 0.031512077897787094, + "learning_rate": 3.8226527231373406e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 193 + }, + { + "completion_length": 168.21429443359375, + "epoch": 0.1357592722183345, + "grad_norm": 0.006084138061851263, + "kl": 0.018543435260653496, + "learning_rate": 3.8208381792884364e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 194 + }, + { + "completion_length": 152.5, + "epoch": 0.1364590622813156, + "grad_norm": 1.1117351055145264, + "kl": 0.02704041823744774, + "learning_rate": 3.819014834957431e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 195 + }, + { + "completion_length": 192.6428680419922, + "epoch": 0.1371588523442967, + "grad_norm": 0.8523726463317871, + "kl": 0.01723356544971466, + "learning_rate": 3.8171826989569195e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 196 + }, + { + "completion_length": 190.57144165039062, + "epoch": 0.13785864240727783, + "grad_norm": 0.35283270478248596, + "kl": 0.009949599392712116, + "learning_rate": 3.8153417801419894e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 197 + }, + { + "completion_length": 152.1428680419922, + "epoch": 0.13855843247025892, + "grad_norm": 1.2546861171722412, + "kl": 0.0401478074491024, + "learning_rate": 3.813492087410175e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 198 + }, + { + "completion_length": 195.07144165039062, + "epoch": 0.13925822253324002, + "grad_norm": 1.291770339012146, + "kl": 0.014195875264704227, + "learning_rate": 3.811633629701419e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 199 + }, + { + "completion_length": 162.5, + "epoch": 0.13995801259622112, + "grad_norm": 2.0312585830688477, + "kl": 0.024253856390714645, + "learning_rate": 3.809766415998027e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 200 + }, + { + "completion_length": 209.35714721679688, + "epoch": 0.14065780265920225, + "grad_norm": 0.9857494235038757, + "kl": 0.012689170427620411, + "learning_rate": 3.807890455324623e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 201 + }, + { + "completion_length": 212.35714721679688, + "epoch": 0.14135759272218335, + "grad_norm": 0.9701467156410217, + "kl": 0.009737645275890827, + "learning_rate": 3.8060057567481074e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 202 + }, + { + "completion_length": 172.1428680419922, + "epoch": 0.14205738278516444, + "grad_norm": 0.007986516691744328, + "kl": 0.025673676282167435, + "learning_rate": 3.804112329377613e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 203 + }, + { + "completion_length": 144.71429443359375, + "epoch": 0.14275717284814557, + "grad_norm": 0.013815109618008137, + "kl": 0.0411306768655777, + "learning_rate": 3.80221018236446e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 204 + }, + { + "completion_length": 209.21429443359375, + "epoch": 0.14345696291112667, + "grad_norm": 0.9316285848617554, + "kl": 0.018284132704138756, + "learning_rate": 3.800299324902112e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 205 + }, + { + "completion_length": 190.07144165039062, + "epoch": 0.14415675297410777, + "grad_norm": 1.9075722694396973, + "kl": 0.01683659665286541, + "learning_rate": 3.7983797662261327e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 206 + }, + { + "completion_length": 179.6428680419922, + "epoch": 0.14485654303708886, + "grad_norm": 0.005668825004249811, + "kl": 0.016565389931201935, + "learning_rate": 3.7964515156141415e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 207 + }, + { + "completion_length": 179.42857360839844, + "epoch": 0.14555633310007, + "grad_norm": 0.005222757812589407, + "kl": 0.017875386402010918, + "learning_rate": 3.7945145823857664e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 208 + }, + { + "completion_length": 158.1428680419922, + "epoch": 0.1462561231630511, + "grad_norm": 0.009748779237270355, + "kl": 0.03730550408363342, + "learning_rate": 3.7925689759026003e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 209 + }, + { + "completion_length": 180.92857360839844, + "epoch": 0.1469559132260322, + "grad_norm": 1.2260764837265015, + "kl": 0.017031870782375336, + "learning_rate": 3.790614705568156e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 210 + }, + { + "completion_length": 192.07144165039062, + "epoch": 0.14765570328901328, + "grad_norm": 0.00429878244176507, + "kl": 0.015482224524021149, + "learning_rate": 3.78865178082782e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 211 + }, + { + "completion_length": 207.2857208251953, + "epoch": 0.1483554933519944, + "grad_norm": 0.6578147411346436, + "kl": 0.013750012032687664, + "learning_rate": 3.786680211168808e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 212 + }, + { + "completion_length": 181.6428680419922, + "epoch": 0.1490552834149755, + "grad_norm": 1.0011402368545532, + "kl": 0.019525719806551933, + "learning_rate": 3.784700006120118e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 213 + }, + { + "completion_length": 212.7857208251953, + "epoch": 0.1497550734779566, + "grad_norm": 2.0065784454345703, + "kl": 0.022760922089219093, + "learning_rate": 3.782711175252486e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 214 + }, + { + "completion_length": 169.1428680419922, + "epoch": 0.1504548635409377, + "grad_norm": 1.3377941846847534, + "kl": 0.02202622964978218, + "learning_rate": 3.780713728178335e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 215 + }, + { + "completion_length": 188.6428680419922, + "epoch": 0.15115465360391883, + "grad_norm": 0.2843207120895386, + "kl": 0.020674534142017365, + "learning_rate": 3.7787076745517344e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 216 + }, + { + "completion_length": 151.35714721679688, + "epoch": 0.15185444366689993, + "grad_norm": 1.662339210510254, + "kl": 0.035333938896656036, + "learning_rate": 3.776693024068351e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 217 + }, + { + "completion_length": 217.2857208251953, + "epoch": 0.15255423372988103, + "grad_norm": 0.005306210834532976, + "kl": 0.013570250011980534, + "learning_rate": 3.774669786465401e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 218 + }, + { + "completion_length": 201.50001525878906, + "epoch": 0.15325402379286215, + "grad_norm": 0.004157788120210171, + "kl": 0.018352538347244263, + "learning_rate": 3.772637971521604e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 219 + }, + { + "completion_length": 187.07144165039062, + "epoch": 0.15395381385584325, + "grad_norm": 0.006472014356404543, + "kl": 0.019100595265626907, + "learning_rate": 3.770597589057136e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 220 + }, + { + "completion_length": 172.2857208251953, + "epoch": 0.15465360391882435, + "grad_norm": 0.5791244506835938, + "kl": 0.02023283950984478, + "learning_rate": 3.7685486489335803e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 221 + }, + { + "completion_length": 196.50001525878906, + "epoch": 0.15535339398180545, + "grad_norm": 0.002360024955123663, + "kl": 0.011786316521465778, + "learning_rate": 3.766491161053884e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 222 + }, + { + "completion_length": 227.21429443359375, + "epoch": 0.15605318404478657, + "grad_norm": 0.4446110427379608, + "kl": 0.008062051609158516, + "learning_rate": 3.764425135362304e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 223 + }, + { + "completion_length": 150.85714721679688, + "epoch": 0.15675297410776767, + "grad_norm": 1.8167444467544556, + "kl": 0.03246838599443436, + "learning_rate": 3.7623505818443656e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 224 + }, + { + "completion_length": 216.42857360839844, + "epoch": 0.15745276417074877, + "grad_norm": 0.9097755551338196, + "kl": 0.014242643490433693, + "learning_rate": 3.760267510526806e-07, + "loss": 0.0, + "reward": 0.0714285746216774, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.0714285746216774, + "step": 225 + }, + { + "completion_length": 168.42857360839844, + "epoch": 0.15815255423372987, + "grad_norm": 0.3198361098766327, + "kl": 0.025004452094435692, + "learning_rate": 3.758175931477536e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 226 + }, + { + "completion_length": 178.07144165039062, + "epoch": 0.158852344296711, + "grad_norm": 0.6277005672454834, + "kl": 0.02422436699271202, + "learning_rate": 3.7560758548055825e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 227 + }, + { + "completion_length": 169.5, + "epoch": 0.1595521343596921, + "grad_norm": 1.8747457265853882, + "kl": 0.034696850925683975, + "learning_rate": 3.753967290661044e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 228 + }, + { + "completion_length": 163.42857360839844, + "epoch": 0.1602519244226732, + "grad_norm": 0.008586201816797256, + "kl": 0.03114090859889984, + "learning_rate": 3.7518502492350405e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 229 + }, + { + "completion_length": 144.1428680419922, + "epoch": 0.1609517144856543, + "grad_norm": 1.1607961654663086, + "kl": 0.028630482032895088, + "learning_rate": 3.749724740759666e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 230 + }, + { + "completion_length": 194.42857360839844, + "epoch": 0.16165150454863542, + "grad_norm": 0.6749139428138733, + "kl": 0.00983841996639967, + "learning_rate": 3.7475907755079354e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 231 + }, + { + "completion_length": 185.50001525878906, + "epoch": 0.16235129461161651, + "grad_norm": 0.005612351931631565, + "kl": 0.018870672211050987, + "learning_rate": 3.7454483637937376e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 232 + }, + { + "completion_length": 178.6428680419922, + "epoch": 0.1630510846745976, + "grad_norm": 1.8697179555892944, + "kl": 0.014896044507622719, + "learning_rate": 3.7432975159717854e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 233 + }, + { + "completion_length": 176.85714721679688, + "epoch": 0.16375087473757874, + "grad_norm": 0.007061387877911329, + "kl": 0.020475070923566818, + "learning_rate": 3.741138242437565e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 234 + }, + { + "completion_length": 206.1428680419922, + "epoch": 0.16445066480055984, + "grad_norm": 0.003183256834745407, + "kl": 0.012574908323585987, + "learning_rate": 3.738970553627286e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 235 + }, + { + "completion_length": 204.21429443359375, + "epoch": 0.16515045486354094, + "grad_norm": 0.0038842950016260147, + "kl": 0.012972171418368816, + "learning_rate": 3.736794460017829e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 236 + }, + { + "completion_length": 180.2857208251953, + "epoch": 0.16585024492652203, + "grad_norm": 0.591704249382019, + "kl": 0.019554350525140762, + "learning_rate": 3.7346099721266994e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 237 + }, + { + "completion_length": 174.21429443359375, + "epoch": 0.16655003498950316, + "grad_norm": 0.0042877099476754665, + "kl": 0.02020900696516037, + "learning_rate": 3.7324171005119714e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 238 + }, + { + "completion_length": 159.2857208251953, + "epoch": 0.16724982505248426, + "grad_norm": 0.6230242252349854, + "kl": 0.024848150089383125, + "learning_rate": 3.7302158557722407e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 239 + }, + { + "completion_length": 196.1428680419922, + "epoch": 0.16794961511546536, + "grad_norm": 0.44546830654144287, + "kl": 0.011597135104238987, + "learning_rate": 3.7280062485465724e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 240 + }, + { + "completion_length": 178.2857208251953, + "epoch": 0.16864940517844645, + "grad_norm": 0.8619314432144165, + "kl": 0.019945021718740463, + "learning_rate": 3.7257882895144485e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 241 + }, + { + "completion_length": 183.42857360839844, + "epoch": 0.16934919524142758, + "grad_norm": 0.005130656994879246, + "kl": 0.0189889594912529, + "learning_rate": 3.7235619893957167e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 242 + }, + { + "completion_length": 213.71429443359375, + "epoch": 0.17004898530440868, + "grad_norm": 0.4043319523334503, + "kl": 0.010713324882090092, + "learning_rate": 3.72132735895054e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 243 + }, + { + "completion_length": 177.71429443359375, + "epoch": 0.17074877536738978, + "grad_norm": 0.007252123672515154, + "kl": 0.024714874103665352, + "learning_rate": 3.7190844089793423e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 244 + }, + { + "completion_length": 215.07144165039062, + "epoch": 0.17144856543037088, + "grad_norm": 0.47612982988357544, + "kl": 0.009942489676177502, + "learning_rate": 3.716833150322758e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 245 + }, + { + "completion_length": 216.00001525878906, + "epoch": 0.172148355493352, + "grad_norm": 0.00426530372351408, + "kl": 0.013690494932234287, + "learning_rate": 3.7145735938615795e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 246 + }, + { + "completion_length": 216.1428680419922, + "epoch": 0.1728481455563331, + "grad_norm": 0.49292147159576416, + "kl": 0.01473014522343874, + "learning_rate": 3.712305750516703e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 247 + }, + { + "completion_length": 188.71429443359375, + "epoch": 0.1735479356193142, + "grad_norm": 0.21176354587078094, + "kl": 0.018445421010255814, + "learning_rate": 3.7100296312490795e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 248 + }, + { + "completion_length": 178.50001525878906, + "epoch": 0.17424772568229532, + "grad_norm": 0.663266658782959, + "kl": 0.013698313385248184, + "learning_rate": 3.707745247059655e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 249 + }, + { + "completion_length": 196.71429443359375, + "epoch": 0.17494751574527642, + "grad_norm": 1.0289642810821533, + "kl": 0.0190252847969532, + "learning_rate": 3.7054526089893267e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 250 + }, + { + "completion_length": 153.57144165039062, + "epoch": 0.17564730580825752, + "grad_norm": 0.007452791091054678, + "kl": 0.023705553263425827, + "learning_rate": 3.7031517281188786e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 251 + }, + { + "completion_length": 113.42857360839844, + "epoch": 0.17634709587123862, + "grad_norm": 1.0395256280899048, + "kl": 0.03168682008981705, + "learning_rate": 3.70084261556894e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 252 + }, + { + "completion_length": 214.85714721679688, + "epoch": 0.17704688593421974, + "grad_norm": 0.002410527318716049, + "kl": 0.007506976369768381, + "learning_rate": 3.6985252824999204e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 253 + }, + { + "completion_length": 198.50001525878906, + "epoch": 0.17774667599720084, + "grad_norm": 0.005293078254908323, + "kl": 0.017621850594878197, + "learning_rate": 3.696199740111964e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 254 + }, + { + "completion_length": 162.1428680419922, + "epoch": 0.17844646606018194, + "grad_norm": 0.5356236696243286, + "kl": 0.03159737586975098, + "learning_rate": 3.693865999644891e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 255 + }, + { + "completion_length": 189.2857208251953, + "epoch": 0.17914625612316304, + "grad_norm": 0.0037518616300076246, + "kl": 0.017401469871401787, + "learning_rate": 3.6915240723781444e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 256 + }, + { + "completion_length": 205.42857360839844, + "epoch": 0.17984604618614417, + "grad_norm": 0.0027896377723664045, + "kl": 0.01305939070880413, + "learning_rate": 3.689173969630737e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 257 + }, + { + "completion_length": 234.00001525878906, + "epoch": 0.18054583624912526, + "grad_norm": 0.860575258731842, + "kl": 0.009122440591454506, + "learning_rate": 3.686815702761193e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 258 + }, + { + "completion_length": 243.35714721679688, + "epoch": 0.18124562631210636, + "grad_norm": 0.0026629262138158083, + "kl": 0.0046313246712088585, + "learning_rate": 3.684449283167498e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 259 + }, + { + "completion_length": 176.42857360839844, + "epoch": 0.1819454163750875, + "grad_norm": 0.8813875317573547, + "kl": 0.02494039013981819, + "learning_rate": 3.6820747222870406e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 260 + }, + { + "completion_length": 182.21429443359375, + "epoch": 0.18264520643806859, + "grad_norm": 1.1977590322494507, + "kl": 0.022919369861483574, + "learning_rate": 3.6796920315965565e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 261 + }, + { + "completion_length": 200.1428680419922, + "epoch": 0.18334499650104968, + "grad_norm": 0.8918925523757935, + "kl": 0.017570775002241135, + "learning_rate": 3.677301222612077e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 262 + }, + { + "completion_length": 144.6428680419922, + "epoch": 0.18404478656403078, + "grad_norm": 1.3486170768737793, + "kl": 0.041876401752233505, + "learning_rate": 3.674902306888867e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 263 + }, + { + "completion_length": 195.2857208251953, + "epoch": 0.1847445766270119, + "grad_norm": 0.4387526214122772, + "kl": 0.014770284295082092, + "learning_rate": 3.672495296021378e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 264 + }, + { + "completion_length": 188.6428680419922, + "epoch": 0.185444366689993, + "grad_norm": 0.008016858249902725, + "kl": 0.02409050054848194, + "learning_rate": 3.6700802016431827e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 265 + }, + { + "completion_length": 170.2857208251953, + "epoch": 0.1861441567529741, + "grad_norm": 0.14902782440185547, + "kl": 0.04438198730349541, + "learning_rate": 3.6676570354269234e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 266 + }, + { + "completion_length": 198.00001525878906, + "epoch": 0.1868439468159552, + "grad_norm": 0.0023566416930407286, + "kl": 0.01239076629281044, + "learning_rate": 3.665225809084259e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 267 + }, + { + "completion_length": 199.21429443359375, + "epoch": 0.18754373687893633, + "grad_norm": 0.7844212651252747, + "kl": 0.016460854560136795, + "learning_rate": 3.6627865343658e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 268 + }, + { + "completion_length": 215.35714721679688, + "epoch": 0.18824352694191743, + "grad_norm": 0.0015277478378266096, + "kl": 0.011156363412737846, + "learning_rate": 3.660339223061059e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 269 + }, + { + "completion_length": 168.85714721679688, + "epoch": 0.18894331700489853, + "grad_norm": 0.009191691875457764, + "kl": 0.02617422305047512, + "learning_rate": 3.657883886998391e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 270 + }, + { + "completion_length": 206.50001525878906, + "epoch": 0.18964310706787962, + "grad_norm": 0.002437381772324443, + "kl": 0.011914732865989208, + "learning_rate": 3.6554205380449343e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 271 + }, + { + "completion_length": 220.21429443359375, + "epoch": 0.19034289713086075, + "grad_norm": 0.008528918959200382, + "kl": 0.02212802693247795, + "learning_rate": 3.652949188106558e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 272 + }, + { + "completion_length": 198.00001525878906, + "epoch": 0.19104268719384185, + "grad_norm": 2.2235240936279297, + "kl": 0.0296917911618948, + "learning_rate": 3.6504698491277993e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 273 + }, + { + "completion_length": 177.42857360839844, + "epoch": 0.19174247725682295, + "grad_norm": 0.00606621615588665, + "kl": 0.024508515372872353, + "learning_rate": 3.6479825330918095e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 274 + }, + { + "completion_length": 210.92857360839844, + "epoch": 0.19244226731980407, + "grad_norm": 0.4510830342769623, + "kl": 0.007396538741886616, + "learning_rate": 3.645487252020294e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 275 + }, + { + "completion_length": 194.42857360839844, + "epoch": 0.19314205738278517, + "grad_norm": 0.5192714929580688, + "kl": 0.013078266754746437, + "learning_rate": 3.6429840179734533e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 276 + }, + { + "completion_length": 174.35714721679688, + "epoch": 0.19384184744576627, + "grad_norm": 2.1625211238861084, + "kl": 0.03255700692534447, + "learning_rate": 3.64047284304993e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 277 + }, + { + "completion_length": 219.4285888671875, + "epoch": 0.19454163750874737, + "grad_norm": 0.007325149606913328, + "kl": 0.009761854074895382, + "learning_rate": 3.6379537393867435e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 278 + }, + { + "completion_length": 229.1428680419922, + "epoch": 0.1952414275717285, + "grad_norm": 0.003577533410862088, + "kl": 0.009171624667942524, + "learning_rate": 3.6354267191592355e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 279 + }, + { + "completion_length": 181.6428680419922, + "epoch": 0.1959412176347096, + "grad_norm": 0.7648665308952332, + "kl": 0.029887784272432327, + "learning_rate": 3.6328917945810094e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 280 + }, + { + "completion_length": 185.71429443359375, + "epoch": 0.1966410076976907, + "grad_norm": 0.005976282991468906, + "kl": 0.02214181050658226, + "learning_rate": 3.630348977903873e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 281 + }, + { + "completion_length": 168.71429443359375, + "epoch": 0.1973407977606718, + "grad_norm": 0.8603221774101257, + "kl": 0.022559626027941704, + "learning_rate": 3.6277982814177773e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 282 + }, + { + "completion_length": 196.92857360839844, + "epoch": 0.1980405878236529, + "grad_norm": 1.4601398706436157, + "kl": 0.01703435182571411, + "learning_rate": 3.6252397174507585e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 283 + }, + { + "completion_length": 175.1428680419922, + "epoch": 0.198740377886634, + "grad_norm": 1.3160336017608643, + "kl": 0.027639390900731087, + "learning_rate": 3.622673298368878e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 284 + }, + { + "completion_length": 191.07144165039062, + "epoch": 0.1994401679496151, + "grad_norm": 1.9431685209274292, + "kl": 0.02291315235197544, + "learning_rate": 3.620099036576163e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 285 + }, + { + "completion_length": 177.57144165039062, + "epoch": 0.2001399580125962, + "grad_norm": 2.985306739807129, + "kl": 0.042643751949071884, + "learning_rate": 3.6175169445145434e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 286 + }, + { + "completion_length": 155.71429443359375, + "epoch": 0.20083974807557733, + "grad_norm": 0.002649400383234024, + "kl": 0.01661285012960434, + "learning_rate": 3.6149270346637984e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 287 + }, + { + "completion_length": 204.21429443359375, + "epoch": 0.20153953813855843, + "grad_norm": 1.8782721757888794, + "kl": 0.01844392530620098, + "learning_rate": 3.61232931954149e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 288 + }, + { + "completion_length": 179.00001525878906, + "epoch": 0.20223932820153953, + "grad_norm": 0.007394877262413502, + "kl": 0.02853863313794136, + "learning_rate": 3.6097238117029046e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 289 + }, + { + "completion_length": 161.35714721679688, + "epoch": 0.20293911826452066, + "grad_norm": 1.560531497001648, + "kl": 0.04655032977461815, + "learning_rate": 3.607110523740992e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 290 + }, + { + "completion_length": 230.50001525878906, + "epoch": 0.20363890832750176, + "grad_norm": 0.2818041145801544, + "kl": 0.024098943918943405, + "learning_rate": 3.6044894682863075e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 291 + }, + { + "completion_length": 190.35714721679688, + "epoch": 0.20433869839048285, + "grad_norm": 0.423895001411438, + "kl": 0.023380879312753677, + "learning_rate": 3.6018606580069444e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 292 + }, + { + "completion_length": 178.7857208251953, + "epoch": 0.20503848845346395, + "grad_norm": 0.8743607401847839, + "kl": 0.0482012964785099, + "learning_rate": 3.5992241056084806e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 293 + }, + { + "completion_length": 199.42857360839844, + "epoch": 0.20573827851644508, + "grad_norm": 0.8680278062820435, + "kl": 0.017911504954099655, + "learning_rate": 3.5965798238339095e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 294 + }, + { + "completion_length": 193.50001525878906, + "epoch": 0.20643806857942618, + "grad_norm": 1.6768147945404053, + "kl": 0.02769216150045395, + "learning_rate": 3.593927825463585e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 295 + }, + { + "completion_length": 155.21429443359375, + "epoch": 0.20713785864240727, + "grad_norm": 1.4847735166549683, + "kl": 0.02230973169207573, + "learning_rate": 3.5912681233151556e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 296 + }, + { + "completion_length": 198.07144165039062, + "epoch": 0.20783764870538837, + "grad_norm": 0.9611667990684509, + "kl": 0.022595616057515144, + "learning_rate": 3.588600730243504e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 297 + }, + { + "completion_length": 188.6428680419922, + "epoch": 0.2085374387683695, + "grad_norm": 1.6368725299835205, + "kl": 0.016185186803340912, + "learning_rate": 3.585925659140685e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 298 + }, + { + "completion_length": 164.2857208251953, + "epoch": 0.2092372288313506, + "grad_norm": 1.5320605039596558, + "kl": 0.025026416406035423, + "learning_rate": 3.583242922935861e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 299 + }, + { + "completion_length": 184.35714721679688, + "epoch": 0.2099370188943317, + "grad_norm": 0.00491850171238184, + "kl": 0.017030972987413406, + "learning_rate": 3.5805525345952454e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 300 + }, + { + "completion_length": 169.42857360839844, + "epoch": 0.2106368089573128, + "grad_norm": 0.005794129334390163, + "kl": 0.033467162400484085, + "learning_rate": 3.577854507122032e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 301 + }, + { + "completion_length": 191.2857208251953, + "epoch": 0.21133659902029392, + "grad_norm": 0.3897000849246979, + "kl": 0.02598237618803978, + "learning_rate": 3.575148853556337e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 302 + }, + { + "completion_length": 198.50001525878906, + "epoch": 0.21203638908327502, + "grad_norm": 2.381969690322876, + "kl": 0.010137013159692287, + "learning_rate": 3.572435586975137e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 303 + }, + { + "completion_length": 159.85714721679688, + "epoch": 0.21273617914625612, + "grad_norm": 2.3385610580444336, + "kl": 0.04521346464753151, + "learning_rate": 3.569714720492202e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 304 + }, + { + "completion_length": 171.92857360839844, + "epoch": 0.21343596920923724, + "grad_norm": 0.009324567392468452, + "kl": 0.036783114075660706, + "learning_rate": 3.566986267258034e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 305 + }, + { + "completion_length": 192.2857208251953, + "epoch": 0.21413575927221834, + "grad_norm": 0.26683616638183594, + "kl": 0.0223538838326931, + "learning_rate": 3.5642502404598047e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 306 + }, + { + "completion_length": 187.07144165039062, + "epoch": 0.21483554933519944, + "grad_norm": 1.4338834285736084, + "kl": 0.01840805448591709, + "learning_rate": 3.561506653321288e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 307 + }, + { + "completion_length": 221.85714721679688, + "epoch": 0.21553533939818054, + "grad_norm": 0.0016010769177228212, + "kl": 0.006783970165997744, + "learning_rate": 3.558755519102801e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 308 + }, + { + "completion_length": 142.1428680419922, + "epoch": 0.21623512946116166, + "grad_norm": 1.469435453414917, + "kl": 0.03523188829421997, + "learning_rate": 3.555996851101135e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 309 + }, + { + "completion_length": 200.85714721679688, + "epoch": 0.21693491952414276, + "grad_norm": 0.7829067707061768, + "kl": 0.01796119287610054, + "learning_rate": 3.553230662649496e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 310 + }, + { + "completion_length": 195.35714721679688, + "epoch": 0.21763470958712386, + "grad_norm": 0.7208725214004517, + "kl": 0.018696611747145653, + "learning_rate": 3.550456967117436e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 311 + }, + { + "completion_length": 183.7857208251953, + "epoch": 0.21833449965010496, + "grad_norm": 2.9220948219299316, + "kl": 0.02893522009253502, + "learning_rate": 3.547675777910791e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 312 + }, + { + "completion_length": 196.35714721679688, + "epoch": 0.21903428971308608, + "grad_norm": 0.005603363737463951, + "kl": 0.023820139467716217, + "learning_rate": 3.5448871084716156e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 313 + }, + { + "completion_length": 202.50001525878906, + "epoch": 0.21973407977606718, + "grad_norm": 1.809675693511963, + "kl": 0.018190210685133934, + "learning_rate": 3.5420909722781173e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 314 + }, + { + "completion_length": 139.71429443359375, + "epoch": 0.22043386983904828, + "grad_norm": 0.005759156309068203, + "kl": 0.027088141068816185, + "learning_rate": 3.539287382844593e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 315 + }, + { + "completion_length": 162.92857360839844, + "epoch": 0.22113365990202938, + "grad_norm": 0.677117645740509, + "kl": 0.02727174200117588, + "learning_rate": 3.536476353721361e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 316 + }, + { + "completion_length": 199.07144165039062, + "epoch": 0.2218334499650105, + "grad_norm": 0.4234854578971863, + "kl": 0.009189656935632229, + "learning_rate": 3.533657898494699e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 317 + }, + { + "completion_length": 169.5, + "epoch": 0.2225332400279916, + "grad_norm": 0.003168420633301139, + "kl": 0.01303552184253931, + "learning_rate": 3.530832030786775e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 318 + }, + { + "completion_length": 167.42857360839844, + "epoch": 0.2232330300909727, + "grad_norm": 0.005851950030773878, + "kl": 0.025543315336108208, + "learning_rate": 3.527998764255584e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 319 + }, + { + "completion_length": 182.71429443359375, + "epoch": 0.22393282015395383, + "grad_norm": 0.9313241839408875, + "kl": 0.01326631661504507, + "learning_rate": 3.52515811259488e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 320 + }, + { + "completion_length": 151.2857208251953, + "epoch": 0.22463261021693492, + "grad_norm": 2.09081768989563, + "kl": 0.04782923310995102, + "learning_rate": 3.522310089534113e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 321 + }, + { + "completion_length": 173.71429443359375, + "epoch": 0.22533240027991602, + "grad_norm": 1.844640851020813, + "kl": 0.03384312242269516, + "learning_rate": 3.519454708838358e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 322 + }, + { + "completion_length": 197.92857360839844, + "epoch": 0.22603219034289712, + "grad_norm": 0.47143810987472534, + "kl": 0.01529007963836193, + "learning_rate": 3.5165919843082527e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 323 + }, + { + "completion_length": 184.07144165039062, + "epoch": 0.22673198040587825, + "grad_norm": 0.007004480808973312, + "kl": 0.028769435361027718, + "learning_rate": 3.513721929779927e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 324 + }, + { + "completion_length": 198.92857360839844, + "epoch": 0.22743177046885935, + "grad_norm": 0.8486812710762024, + "kl": 0.0238034650683403, + "learning_rate": 3.5108445591249415e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 325 + }, + { + "completion_length": 198.92857360839844, + "epoch": 0.22813156053184044, + "grad_norm": 0.3655204176902771, + "kl": 0.009955493733286858, + "learning_rate": 3.5079598862502127e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 326 + }, + { + "completion_length": 158.42857360839844, + "epoch": 0.22883135059482154, + "grad_norm": 0.0073427981697022915, + "kl": 0.03223417326807976, + "learning_rate": 3.5050679250979545e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 327 + }, + { + "completion_length": 192.85714721679688, + "epoch": 0.22953114065780267, + "grad_norm": 0.7927706837654114, + "kl": 0.01642785035073757, + "learning_rate": 3.502168689645604e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 328 + }, + { + "completion_length": 188.2857208251953, + "epoch": 0.23023093072078377, + "grad_norm": 0.7956569790840149, + "kl": 0.020435122773051262, + "learning_rate": 3.499262193905757e-07, + "loss": 0.0, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 329 + }, + { + "completion_length": 195.35714721679688, + "epoch": 0.23093072078376486, + "grad_norm": 0.5955920219421387, + "kl": 0.05103569105267525, + "learning_rate": 3.4963484519261004e-07, + "loss": 0.0001, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 330 + }, + { + "completion_length": 182.35714721679688, + "epoch": 0.23163051084674596, + "grad_norm": 2.227114677429199, + "kl": 0.037436001002788544, + "learning_rate": 3.493427477789343e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 331 + }, + { + "completion_length": 169.85714721679688, + "epoch": 0.2323303009097271, + "grad_norm": 0.7168689370155334, + "kl": 0.035353146493434906, + "learning_rate": 3.490499285613148e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 332 + }, + { + "completion_length": 190.92857360839844, + "epoch": 0.2330300909727082, + "grad_norm": 1.7933284044265747, + "kl": 0.022699838504195213, + "learning_rate": 3.487563889550065e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 333 + }, + { + "completion_length": 173.85714721679688, + "epoch": 0.23372988103568929, + "grad_norm": 0.0028698286041617393, + "kl": 0.015531730838119984, + "learning_rate": 3.484621303787462e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 334 + }, + { + "completion_length": 180.50001525878906, + "epoch": 0.2344296710986704, + "grad_norm": 0.010946807451546192, + "kl": 0.026166755706071854, + "learning_rate": 3.481671542547456e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 335 + }, + { + "completion_length": 224.6428680419922, + "epoch": 0.2351294611616515, + "grad_norm": 0.9801257848739624, + "kl": 0.009198243729770184, + "learning_rate": 3.478714620086844e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 336 + }, + { + "completion_length": 189.85714721679688, + "epoch": 0.2358292512246326, + "grad_norm": 0.7460960149765015, + "kl": 0.018040716648101807, + "learning_rate": 3.475750550697034e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 337 + }, + { + "completion_length": 178.71429443359375, + "epoch": 0.2365290412876137, + "grad_norm": 0.7375363707542419, + "kl": 0.018047412857413292, + "learning_rate": 3.47277934870398e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 338 + }, + { + "completion_length": 194.71429443359375, + "epoch": 0.23722883135059483, + "grad_norm": 0.0035240540746599436, + "kl": 0.013578321784734726, + "learning_rate": 3.4698010284681044e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 339 + }, + { + "completion_length": 186.00001525878906, + "epoch": 0.23792862141357593, + "grad_norm": 0.010145165026187897, + "kl": 0.026406429708003998, + "learning_rate": 3.466815604384238e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 340 + }, + { + "completion_length": 176.07144165039062, + "epoch": 0.23862841147655703, + "grad_norm": 2.3864660263061523, + "kl": 0.02834818884730339, + "learning_rate": 3.463823090881543e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 341 + }, + { + "completion_length": 186.85714721679688, + "epoch": 0.23932820153953813, + "grad_norm": 0.3408975601196289, + "kl": 0.018927130848169327, + "learning_rate": 3.4608235024234474e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 342 + }, + { + "completion_length": 218.6428680419922, + "epoch": 0.24002799160251925, + "grad_norm": 2.155103921890259, + "kl": 0.01563032530248165, + "learning_rate": 3.457816853507574e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 343 + }, + { + "completion_length": 209.35714721679688, + "epoch": 0.24072778166550035, + "grad_norm": 0.003645299468189478, + "kl": 0.013128400780260563, + "learning_rate": 3.454803158665669e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 344 + }, + { + "completion_length": 199.07144165039062, + "epoch": 0.24142757172848145, + "grad_norm": 0.003979403525590897, + "kl": 0.020676320418715477, + "learning_rate": 3.4517824324635345e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 345 + }, + { + "completion_length": 201.21429443359375, + "epoch": 0.24212736179146255, + "grad_norm": 0.9270420670509338, + "kl": 0.01023801788687706, + "learning_rate": 3.448754689500956e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 346 + }, + { + "completion_length": 173.42857360839844, + "epoch": 0.24282715185444367, + "grad_norm": 0.9614090323448181, + "kl": 0.022334929555654526, + "learning_rate": 3.445719944411633e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 347 + }, + { + "completion_length": 193.57144165039062, + "epoch": 0.24352694191742477, + "grad_norm": 0.6329968571662903, + "kl": 0.03677517920732498, + "learning_rate": 3.4426782118631065e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 348 + }, + { + "completion_length": 188.92857360839844, + "epoch": 0.24422673198040587, + "grad_norm": 0.0044876690953969955, + "kl": 0.02110815793275833, + "learning_rate": 3.4396295065566896e-07, + "loss": 0.0, + "reward": 0.1428571492433548, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.1428571492433548, + "step": 349 + }, + { + "completion_length": 184.92857360839844, + "epoch": 0.244926522043387, + "grad_norm": 2.3479273319244385, + "kl": 0.02571859024465084, + "learning_rate": 3.436573843227397e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 350 + }, + { + "completion_length": 172.42857360839844, + "epoch": 0.2456263121063681, + "grad_norm": 0.15331880748271942, + "kl": 0.018859129399061203, + "learning_rate": 3.4335112366438724e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 351 + }, + { + "completion_length": 178.42857360839844, + "epoch": 0.2463261021693492, + "grad_norm": 0.8721615672111511, + "kl": 0.027232788503170013, + "learning_rate": 3.4304417016083183e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 352 + }, + { + "completion_length": 177.57144165039062, + "epoch": 0.2470258922323303, + "grad_norm": 0.00629481952637434, + "kl": 0.023867381736636162, + "learning_rate": 3.427365252956423e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 353 + }, + { + "completion_length": 184.07144165039062, + "epoch": 0.24772568229531142, + "grad_norm": 0.005834286566823721, + "kl": 0.029148750007152557, + "learning_rate": 3.424281905557289e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 354 + }, + { + "completion_length": 197.00001525878906, + "epoch": 0.24842547235829252, + "grad_norm": 0.9186586141586304, + "kl": 0.02936585247516632, + "learning_rate": 3.4211916743133643e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 355 + }, + { + "completion_length": 181.6428680419922, + "epoch": 0.2491252624212736, + "grad_norm": 1.5280996561050415, + "kl": 0.044441286474466324, + "learning_rate": 3.4180945741603654e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 356 + }, + { + "completion_length": 185.57144165039062, + "epoch": 0.2498250524842547, + "grad_norm": 2.672974109649658, + "kl": 0.0459924079477787, + "learning_rate": 3.4149906200672086e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 357 + }, + { + "completion_length": 165.71429443359375, + "epoch": 0.25052484254723584, + "grad_norm": 0.007046896498650312, + "kl": 0.03102685511112213, + "learning_rate": 3.411879827035937e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 358 + }, + { + "completion_length": 157.92857360839844, + "epoch": 0.25122463261021694, + "grad_norm": 0.6856256723403931, + "kl": 0.04061925411224365, + "learning_rate": 3.4087622101016484e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 359 + }, + { + "completion_length": 172.2857208251953, + "epoch": 0.25192442267319803, + "grad_norm": 0.003146551316604018, + "kl": 0.01322211790829897, + "learning_rate": 3.40563778433242e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 360 + }, + { + "completion_length": 181.2857208251953, + "epoch": 0.25262421273617913, + "grad_norm": 0.7451429963111877, + "kl": 0.034680504351854324, + "learning_rate": 3.402506564829239e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 361 + }, + { + "completion_length": 174.85714721679688, + "epoch": 0.25332400279916023, + "grad_norm": 0.007092596963047981, + "kl": 0.03785092756152153, + "learning_rate": 3.399368566725927e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 362 + }, + { + "completion_length": 223.1428680419922, + "epoch": 0.2540237928621414, + "grad_norm": 0.7141907215118408, + "kl": 0.010259164497256279, + "learning_rate": 3.396223805189068e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 363 + }, + { + "completion_length": 200.85714721679688, + "epoch": 0.2547235829251225, + "grad_norm": 0.6981567144393921, + "kl": 0.02434498630464077, + "learning_rate": 3.393072295417937e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 364 + }, + { + "completion_length": 182.35714721679688, + "epoch": 0.2554233729881036, + "grad_norm": 0.6945000290870667, + "kl": 0.03650469705462456, + "learning_rate": 3.389914052644423e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 365 + }, + { + "completion_length": 182.6428680419922, + "epoch": 0.2561231630510847, + "grad_norm": 2.042914867401123, + "kl": 0.02148597687482834, + "learning_rate": 3.3867490921329557e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 366 + }, + { + "completion_length": 186.50001525878906, + "epoch": 0.2568229531140658, + "grad_norm": 2.7780933380126953, + "kl": 0.04937548562884331, + "learning_rate": 3.3835774291804357e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 367 + }, + { + "completion_length": 165.07144165039062, + "epoch": 0.2575227431770469, + "grad_norm": 0.41162511706352234, + "kl": 0.01816270314157009, + "learning_rate": 3.3803990791161567e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 368 + }, + { + "completion_length": 200.1428680419922, + "epoch": 0.258222533240028, + "grad_norm": 0.9085342288017273, + "kl": 0.017531177029013634, + "learning_rate": 3.3772140573017316e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 369 + }, + { + "completion_length": 210.7857208251953, + "epoch": 0.25892232330300907, + "grad_norm": 1.0211751461029053, + "kl": 0.022307362407445908, + "learning_rate": 3.3740223791310205e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 370 + }, + { + "completion_length": 220.50001525878906, + "epoch": 0.2596221133659902, + "grad_norm": 0.00706710759550333, + "kl": 0.022423824295401573, + "learning_rate": 3.370824060030054e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 371 + }, + { + "completion_length": 227.35714721679688, + "epoch": 0.2603219034289713, + "grad_norm": 0.003542853519320488, + "kl": 0.017943989485502243, + "learning_rate": 3.3676191154569627e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 372 + }, + { + "completion_length": 160.92857360839844, + "epoch": 0.2610216934919524, + "grad_norm": 0.6578136086463928, + "kl": 0.04885334149003029, + "learning_rate": 3.364407560901894e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 373 + }, + { + "completion_length": 177.2857208251953, + "epoch": 0.2617214835549335, + "grad_norm": 1.1439343690872192, + "kl": 0.025096897035837173, + "learning_rate": 3.361189411886947e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 374 + }, + { + "completion_length": 197.2857208251953, + "epoch": 0.2624212736179146, + "grad_norm": 1.142340064048767, + "kl": 0.02332978881895542, + "learning_rate": 3.3579646839660923e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 375 + }, + { + "completion_length": 165.57144165039062, + "epoch": 0.2631210636808957, + "grad_norm": 0.010401121340692043, + "kl": 0.04579593241214752, + "learning_rate": 3.3547333927250973e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 376 + }, + { + "completion_length": 193.92857360839844, + "epoch": 0.2638208537438768, + "grad_norm": 0.7678545117378235, + "kl": 0.01843220740556717, + "learning_rate": 3.3514955537814506e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 377 + }, + { + "completion_length": 168.35714721679688, + "epoch": 0.26452064380685797, + "grad_norm": 1.4129458665847778, + "kl": 0.031178675591945648, + "learning_rate": 3.3482511827842885e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 378 + }, + { + "completion_length": 189.85714721679688, + "epoch": 0.26522043386983907, + "grad_norm": 0.4906970262527466, + "kl": 0.011025870218873024, + "learning_rate": 3.3450002954143165e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 379 + }, + { + "completion_length": 201.71429443359375, + "epoch": 0.26592022393282017, + "grad_norm": 0.9279190301895142, + "kl": 0.018940366804599762, + "learning_rate": 3.341742907383737e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 380 + }, + { + "completion_length": 188.92857360839844, + "epoch": 0.26662001399580126, + "grad_norm": 0.5956905484199524, + "kl": 0.029153874143958092, + "learning_rate": 3.33847903443617e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 381 + }, + { + "completion_length": 173.2857208251953, + "epoch": 0.26731980405878236, + "grad_norm": 1.0010219812393188, + "kl": 0.029253369197249413, + "learning_rate": 3.335208692346579e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 382 + }, + { + "completion_length": 178.07144165039062, + "epoch": 0.26801959412176346, + "grad_norm": 2.680180072784424, + "kl": 0.028426989912986755, + "learning_rate": 3.331931896921193e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 383 + }, + { + "completion_length": 170.07144165039062, + "epoch": 0.26871938418474456, + "grad_norm": 0.005205106921494007, + "kl": 0.023029359057545662, + "learning_rate": 3.3286486639974333e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 384 + }, + { + "completion_length": 175.21429443359375, + "epoch": 0.26941917424772566, + "grad_norm": 1.9119175672531128, + "kl": 0.033027730882167816, + "learning_rate": 3.325359009443834e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 385 + }, + { + "completion_length": 175.42857360839844, + "epoch": 0.2701189643107068, + "grad_norm": 0.004134666174650192, + "kl": 0.021845312789082527, + "learning_rate": 3.3220629491599645e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 386 + }, + { + "completion_length": 181.1428680419922, + "epoch": 0.2708187543736879, + "grad_norm": 1.2391949892044067, + "kl": 0.03317902237176895, + "learning_rate": 3.318760499076358e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 387 + }, + { + "completion_length": 208.6428680419922, + "epoch": 0.271518544436669, + "grad_norm": 0.006398039869964123, + "kl": 0.021072857081890106, + "learning_rate": 3.3154516751544286e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 388 + }, + { + "completion_length": 229.85714721679688, + "epoch": 0.2722183344996501, + "grad_norm": 1.2711175680160522, + "kl": 0.012140165083110332, + "learning_rate": 3.312136493386396e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 389 + }, + { + "completion_length": 200.6428680419922, + "epoch": 0.2729181245626312, + "grad_norm": 0.004189657047390938, + "kl": 0.016920868307352066, + "learning_rate": 3.308814969795211e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 390 + }, + { + "completion_length": 175.85714721679688, + "epoch": 0.2736179146256123, + "grad_norm": 1.2724921703338623, + "kl": 0.02616955153644085, + "learning_rate": 3.305487120434472e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 391 + }, + { + "completion_length": 180.85714721679688, + "epoch": 0.2743177046885934, + "grad_norm": 1.3679845333099365, + "kl": 0.025574391707777977, + "learning_rate": 3.3021529613883557e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 392 + }, + { + "completion_length": 179.35714721679688, + "epoch": 0.27501749475157455, + "grad_norm": 1.2445677518844604, + "kl": 0.022411201149225235, + "learning_rate": 3.2988125087715304e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 393 + }, + { + "completion_length": 181.71429443359375, + "epoch": 0.27571728481455565, + "grad_norm": 1.1583846807479858, + "kl": 0.025185860693454742, + "learning_rate": 3.2954657787290854e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 394 + }, + { + "completion_length": 187.35714721679688, + "epoch": 0.27641707487753675, + "grad_norm": 1.3583505153656006, + "kl": 0.02562299557030201, + "learning_rate": 3.2921127874364495e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 395 + }, + { + "completion_length": 176.07144165039062, + "epoch": 0.27711686494051785, + "grad_norm": 0.5323813557624817, + "kl": 0.04567427560687065, + "learning_rate": 3.2887535510993133e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 396 + }, + { + "completion_length": 179.21429443359375, + "epoch": 0.27781665500349895, + "grad_norm": 0.43912947177886963, + "kl": 0.02632255293428898, + "learning_rate": 3.28538808595355e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 397 + }, + { + "completion_length": 197.42857360839844, + "epoch": 0.27851644506648005, + "grad_norm": 0.004964158404618502, + "kl": 0.025014452636241913, + "learning_rate": 3.2820164082651395e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 398 + }, + { + "completion_length": 180.21429443359375, + "epoch": 0.27921623512946114, + "grad_norm": 1.811867117881775, + "kl": 0.017614759504795074, + "learning_rate": 3.2786385343300867e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.5050762891769409, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 399 + }, + { + "completion_length": 190.50001525878906, + "epoch": 0.27991602519244224, + "grad_norm": 2.259676456451416, + "kl": 0.02197328954935074, + "learning_rate": 3.275254480474345e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 400 + }, + { + "completion_length": 180.35714721679688, + "epoch": 0.2806158152554234, + "grad_norm": 0.006867602933198214, + "kl": 0.029920106753706932, + "learning_rate": 3.2718642630537365e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 401 + }, + { + "completion_length": 160.57144165039062, + "epoch": 0.2813156053184045, + "grad_norm": 0.005083282012492418, + "kl": 0.02295750565826893, + "learning_rate": 3.268467898453874e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 402 + }, + { + "completion_length": 183.42857360839844, + "epoch": 0.2820153953813856, + "grad_norm": 0.005555762443691492, + "kl": 0.02372424677014351, + "learning_rate": 3.265065403090079e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 403 + }, + { + "completion_length": 196.07144165039062, + "epoch": 0.2827151854443667, + "grad_norm": 0.9104824662208557, + "kl": 0.021749457344412804, + "learning_rate": 3.2616567934073055e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 404 + }, + { + "completion_length": 155.07144165039062, + "epoch": 0.2834149755073478, + "grad_norm": 1.1475639343261719, + "kl": 0.015838012099266052, + "learning_rate": 3.2582420858800594e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 405 + }, + { + "completion_length": 231.35714721679688, + "epoch": 0.2841147655703289, + "grad_norm": 0.6490912437438965, + "kl": 0.012299914844334126, + "learning_rate": 3.2548212970123176e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 406 + }, + { + "completion_length": 191.35714721679688, + "epoch": 0.28481455563331, + "grad_norm": 0.005345775280147791, + "kl": 0.020644469186663628, + "learning_rate": 3.2513944433374496e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 407 + }, + { + "completion_length": 188.85714721679688, + "epoch": 0.28551434569629114, + "grad_norm": 0.7396431565284729, + "kl": 0.024756096303462982, + "learning_rate": 3.2479615414181393e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 408 + }, + { + "completion_length": 159.35714721679688, + "epoch": 0.28621413575927224, + "grad_norm": 1.3673442602157593, + "kl": 0.03201150521636009, + "learning_rate": 3.2445226078463003e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 409 + }, + { + "completion_length": 175.6428680419922, + "epoch": 0.28691392582225334, + "grad_norm": 0.006910817231982946, + "kl": 0.03318994492292404, + "learning_rate": 3.2410776592429993e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 410 + }, + { + "completion_length": 200.57144165039062, + "epoch": 0.28761371588523443, + "grad_norm": 0.8189232349395752, + "kl": 0.014908754266798496, + "learning_rate": 3.2376267122583765e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 411 + }, + { + "completion_length": 199.7857208251953, + "epoch": 0.28831350594821553, + "grad_norm": 1.0011372566223145, + "kl": 0.026383478194475174, + "learning_rate": 3.234169783571561e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 412 + }, + { + "completion_length": 197.71429443359375, + "epoch": 0.28901329601119663, + "grad_norm": 0.6323328614234924, + "kl": 0.029050709679722786, + "learning_rate": 3.2307068898905946e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 413 + }, + { + "completion_length": 196.2857208251953, + "epoch": 0.28971308607417773, + "grad_norm": 0.7771051526069641, + "kl": 0.020319391041994095, + "learning_rate": 3.227238047952348e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 414 + }, + { + "completion_length": 192.07144165039062, + "epoch": 0.2904128761371588, + "grad_norm": 0.7694050073623657, + "kl": 0.013213138096034527, + "learning_rate": 3.2237632745224415e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 415 + }, + { + "completion_length": 192.71429443359375, + "epoch": 0.29111266620014, + "grad_norm": 0.003982989117503166, + "kl": 0.01707112416625023, + "learning_rate": 3.2202825863951624e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 416 + }, + { + "completion_length": 203.92857360839844, + "epoch": 0.2918124562631211, + "grad_norm": 0.8812685012817383, + "kl": 0.019418731331825256, + "learning_rate": 3.2167960003933884e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 417 + }, + { + "completion_length": 148.92857360839844, + "epoch": 0.2925122463261022, + "grad_norm": 0.9575265049934387, + "kl": 0.041733238846063614, + "learning_rate": 3.2133035333684985e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 418 + }, + { + "completion_length": 191.07144165039062, + "epoch": 0.2932120363890833, + "grad_norm": 0.6743429899215698, + "kl": 0.02127690054476261, + "learning_rate": 3.2098052022002976e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 419 + }, + { + "completion_length": 173.1428680419922, + "epoch": 0.2939118264520644, + "grad_norm": 1.4550578594207764, + "kl": 0.039650507271289825, + "learning_rate": 3.206301023796934e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 420 + }, + { + "completion_length": 195.71429443359375, + "epoch": 0.29461161651504547, + "grad_norm": 1.651827096939087, + "kl": 0.030250184237957, + "learning_rate": 3.2027910150948166e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 421 + }, + { + "completion_length": 198.85714721679688, + "epoch": 0.29531140657802657, + "grad_norm": 0.38959023356437683, + "kl": 0.014403433538973331, + "learning_rate": 3.1992751930585325e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 422 + }, + { + "completion_length": 159.6428680419922, + "epoch": 0.2960111966410077, + "grad_norm": 1.345651626586914, + "kl": 0.03709445148706436, + "learning_rate": 3.195753574680766e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 423 + }, + { + "completion_length": 212.71429443359375, + "epoch": 0.2967109867039888, + "grad_norm": 1.3157126903533936, + "kl": 0.025067942216992378, + "learning_rate": 3.192226176982218e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 424 + }, + { + "completion_length": 216.71429443359375, + "epoch": 0.2974107767669699, + "grad_norm": 0.7132723927497864, + "kl": 0.014864970929920673, + "learning_rate": 3.188693017011519e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 425 + }, + { + "completion_length": 170.6428680419922, + "epoch": 0.298110566829951, + "grad_norm": 0.006631612312048674, + "kl": 0.03703395277261734, + "learning_rate": 3.1851541118451517e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 426 + }, + { + "completion_length": 178.42857360839844, + "epoch": 0.2988103568929321, + "grad_norm": 1.1006743907928467, + "kl": 0.05553247407078743, + "learning_rate": 3.181609478587367e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 427 + }, + { + "completion_length": 183.00001525878906, + "epoch": 0.2995101469559132, + "grad_norm": 0.006483997218310833, + "kl": 0.025525551289319992, + "learning_rate": 3.1780591343700993e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 428 + }, + { + "completion_length": 188.1428680419922, + "epoch": 0.3002099370188943, + "grad_norm": 3.3809287548065186, + "kl": 0.0215449295938015, + "learning_rate": 3.1745030963528867e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 429 + }, + { + "completion_length": 217.71429443359375, + "epoch": 0.3009097270818754, + "grad_norm": 1.399482250213623, + "kl": 0.015575726516544819, + "learning_rate": 3.1709413817227847e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 430 + }, + { + "completion_length": 185.42857360839844, + "epoch": 0.30160951714485656, + "grad_norm": 0.8324539661407471, + "kl": 0.01825200393795967, + "learning_rate": 3.1673740076942875e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 431 + }, + { + "completion_length": 189.35714721679688, + "epoch": 0.30230930720783766, + "grad_norm": 0.004090950824320316, + "kl": 0.01635858602821827, + "learning_rate": 3.1638009915092393e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 432 + }, + { + "completion_length": 205.7857208251953, + "epoch": 0.30300909727081876, + "grad_norm": 1.0944817066192627, + "kl": 0.026154454797506332, + "learning_rate": 3.160222350436757e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 433 + }, + { + "completion_length": 191.85714721679688, + "epoch": 0.30370888733379986, + "grad_norm": 0.6462128162384033, + "kl": 0.018102161586284637, + "learning_rate": 3.156638101773143e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 434 + }, + { + "completion_length": 215.35714721679688, + "epoch": 0.30440867739678096, + "grad_norm": 0.004543273244053125, + "kl": 0.02344910241663456, + "learning_rate": 3.1530482628418e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 435 + }, + { + "completion_length": 211.50001525878906, + "epoch": 0.30510846745976206, + "grad_norm": 0.007342597935348749, + "kl": 0.023821823298931122, + "learning_rate": 3.149452850993152e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 436 + }, + { + "completion_length": 195.50001525878906, + "epoch": 0.30580825752274315, + "grad_norm": 1.1299775838851929, + "kl": 0.02076021395623684, + "learning_rate": 3.145851883604558e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 437 + }, + { + "completion_length": 186.2857208251953, + "epoch": 0.3065080475857243, + "grad_norm": 0.8335484862327576, + "kl": 0.031062429770827293, + "learning_rate": 3.1422453780802264e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 438 + }, + { + "completion_length": 195.50001525878906, + "epoch": 0.3072078376487054, + "grad_norm": 0.007976455613970757, + "kl": 0.02391653135418892, + "learning_rate": 3.1386333518511345e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 439 + }, + { + "completion_length": 200.21429443359375, + "epoch": 0.3079076277116865, + "grad_norm": 0.7803175449371338, + "kl": 0.020620595663785934, + "learning_rate": 3.1350158223749413e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 440 + }, + { + "completion_length": 195.6428680419922, + "epoch": 0.3086074177746676, + "grad_norm": 0.004706548992544413, + "kl": 0.020712165161967278, + "learning_rate": 3.1313928071359036e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 441 + }, + { + "completion_length": 196.85714721679688, + "epoch": 0.3093072078376487, + "grad_norm": 0.8195966482162476, + "kl": 0.032697319984436035, + "learning_rate": 3.1277643236447934e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 442 + }, + { + "completion_length": 171.00001525878906, + "epoch": 0.3100069979006298, + "grad_norm": 1.1854536533355713, + "kl": 0.038218557834625244, + "learning_rate": 3.124130389438811e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 443 + }, + { + "completion_length": 164.85714721679688, + "epoch": 0.3107067879636109, + "grad_norm": 0.8803959488868713, + "kl": 0.036526720970869064, + "learning_rate": 3.1204910220815005e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 444 + }, + { + "completion_length": 207.07144165039062, + "epoch": 0.311406578026592, + "grad_norm": 0.8009771704673767, + "kl": 0.020504629239439964, + "learning_rate": 3.116846239162666e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 445 + }, + { + "completion_length": 199.92857360839844, + "epoch": 0.31210636808957315, + "grad_norm": 1.1930276155471802, + "kl": 0.02396634966135025, + "learning_rate": 3.1131960582982876e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 446 + }, + { + "completion_length": 201.92857360839844, + "epoch": 0.31280615815255425, + "grad_norm": 0.007053898181766272, + "kl": 0.02715187333524227, + "learning_rate": 3.109540497130433e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 447 + }, + { + "completion_length": 144.2857208251953, + "epoch": 0.31350594821553535, + "grad_norm": 1.8172719478607178, + "kl": 0.034259747713804245, + "learning_rate": 3.105879573327174e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 448 + }, + { + "completion_length": 198.57144165039062, + "epoch": 0.31420573827851644, + "grad_norm": 0.8178118467330933, + "kl": 0.028386041522026062, + "learning_rate": 3.102213304582502e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 449 + }, + { + "completion_length": 198.1428680419922, + "epoch": 0.31490552834149754, + "grad_norm": 1.054382085800171, + "kl": 0.017203137278556824, + "learning_rate": 3.0985417086162417e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 450 + }, + { + "completion_length": 204.92857360839844, + "epoch": 0.31560531840447864, + "grad_norm": 0.8276398777961731, + "kl": 0.02506078965961933, + "learning_rate": 3.094864803173964e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 451 + }, + { + "completion_length": 181.35714721679688, + "epoch": 0.31630510846745974, + "grad_norm": 1.2800688743591309, + "kl": 0.03069334104657173, + "learning_rate": 3.091182606026903e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 452 + }, + { + "completion_length": 183.2857208251953, + "epoch": 0.3170048985304409, + "grad_norm": 0.8573112487792969, + "kl": 0.0253884457051754, + "learning_rate": 3.087495134971867e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 453 + }, + { + "completion_length": 198.85714721679688, + "epoch": 0.317704688593422, + "grad_norm": 0.004384016152471304, + "kl": 0.01863146387040615, + "learning_rate": 3.0838024078311577e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 454 + }, + { + "completion_length": 216.00001525878906, + "epoch": 0.3184044786564031, + "grad_norm": 0.4845581352710724, + "kl": 0.01092128548771143, + "learning_rate": 3.080104442452476e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 455 + }, + { + "completion_length": 204.00001525878906, + "epoch": 0.3191042687193842, + "grad_norm": 0.0038679074496030807, + "kl": 0.017811274155974388, + "learning_rate": 3.076401256708843e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 456 + }, + { + "completion_length": 182.07144165039062, + "epoch": 0.3198040587823653, + "grad_norm": 2.2464168071746826, + "kl": 0.04614328593015671, + "learning_rate": 3.0726928684985105e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 457 + }, + { + "completion_length": 181.50001525878906, + "epoch": 0.3205038488453464, + "grad_norm": 1.5309993028640747, + "kl": 0.022071899846196175, + "learning_rate": 3.0689792957448753e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 458 + }, + { + "completion_length": 159.57144165039062, + "epoch": 0.3212036389083275, + "grad_norm": 0.6382618546485901, + "kl": 0.04648633673787117, + "learning_rate": 3.0652605563963896e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 459 + }, + { + "completion_length": 165.85714721679688, + "epoch": 0.3219034289713086, + "grad_norm": 1.0912450551986694, + "kl": 0.026466449722647667, + "learning_rate": 3.061536668426481e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 460 + }, + { + "completion_length": 196.1428680419922, + "epoch": 0.32260321903428973, + "grad_norm": 0.005231840070337057, + "kl": 0.026346756145358086, + "learning_rate": 3.057807649833457e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 461 + }, + { + "completion_length": 221.9285888671875, + "epoch": 0.32330300909727083, + "grad_norm": 1.4150323867797852, + "kl": 0.01162702776491642, + "learning_rate": 3.054073518640427e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 462 + }, + { + "completion_length": 183.35714721679688, + "epoch": 0.32400279916025193, + "grad_norm": 3.7010645866394043, + "kl": 0.03048822656273842, + "learning_rate": 3.050334292895207e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 463 + }, + { + "completion_length": 196.35714721679688, + "epoch": 0.32470258922323303, + "grad_norm": 0.765592634677887, + "kl": 0.019732153043150902, + "learning_rate": 3.0465899906702365e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 464 + }, + { + "completion_length": 197.50001525878906, + "epoch": 0.3254023792862141, + "grad_norm": 0.9544207453727722, + "kl": 0.023729894310235977, + "learning_rate": 3.042840630062493e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 465 + }, + { + "completion_length": 148.2857208251953, + "epoch": 0.3261021693491952, + "grad_norm": 2.0614309310913086, + "kl": 0.0456995889544487, + "learning_rate": 3.039086229193399e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 466 + }, + { + "completion_length": 193.1428680419922, + "epoch": 0.3268019594121763, + "grad_norm": 0.004235828295350075, + "kl": 0.02045871689915657, + "learning_rate": 3.035326806208741e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 467 + }, + { + "completion_length": 209.42857360839844, + "epoch": 0.3275017494751575, + "grad_norm": 0.8472065329551697, + "kl": 0.012107213959097862, + "learning_rate": 3.031562379278575e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 468 + }, + { + "completion_length": 208.00001525878906, + "epoch": 0.3282015395381386, + "grad_norm": 0.9307161569595337, + "kl": 0.03212505951523781, + "learning_rate": 3.0277929665971447e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 469 + }, + { + "completion_length": 184.1428680419922, + "epoch": 0.3289013296011197, + "grad_norm": 0.00613441364839673, + "kl": 0.029952503740787506, + "learning_rate": 3.02401858638279e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 470 + }, + { + "completion_length": 145.07144165039062, + "epoch": 0.3296011196641008, + "grad_norm": 1.3014689683914185, + "kl": 0.05368249490857124, + "learning_rate": 3.0202392568778593e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 471 + }, + { + "completion_length": 205.2857208251953, + "epoch": 0.33030090972708187, + "grad_norm": 1.50192129611969, + "kl": 0.02523050829768181, + "learning_rate": 3.0164549963486235e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 472 + }, + { + "completion_length": 180.50001525878906, + "epoch": 0.33100069979006297, + "grad_norm": 0.8972246646881104, + "kl": 0.05419189855456352, + "learning_rate": 3.0126658230851845e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 473 + }, + { + "completion_length": 184.85714721679688, + "epoch": 0.33170048985304407, + "grad_norm": 1.803112268447876, + "kl": 0.02427106536924839, + "learning_rate": 3.0088717554013884e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 474 + }, + { + "completion_length": 197.7857208251953, + "epoch": 0.33240027991602517, + "grad_norm": 1.4649418592453003, + "kl": 0.04181037098169327, + "learning_rate": 3.00507281163474e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 475 + }, + { + "completion_length": 190.00001525878906, + "epoch": 0.3331000699790063, + "grad_norm": 1.0860769748687744, + "kl": 0.033349357545375824, + "learning_rate": 3.001269010146306e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 476 + }, + { + "completion_length": 171.1428680419922, + "epoch": 0.3337998600419874, + "grad_norm": 1.4760380983352661, + "kl": 0.042727965861558914, + "learning_rate": 2.9974603693206366e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 477 + }, + { + "completion_length": 188.2857208251953, + "epoch": 0.3344996501049685, + "grad_norm": 0.5807764530181885, + "kl": 0.024857990443706512, + "learning_rate": 2.9936469075656683e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 478 + }, + { + "completion_length": 214.1428680419922, + "epoch": 0.3351994401679496, + "grad_norm": 1.7858057022094727, + "kl": 0.021253671497106552, + "learning_rate": 2.989828643312639e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 479 + }, + { + "completion_length": 211.6428680419922, + "epoch": 0.3358992302309307, + "grad_norm": 1.2214895486831665, + "kl": 0.02429124526679516, + "learning_rate": 2.9860055950159994e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 480 + }, + { + "completion_length": 184.35714721679688, + "epoch": 0.3365990202939118, + "grad_norm": 0.008441203273832798, + "kl": 0.04451807960867882, + "learning_rate": 2.98217778115332e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 481 + }, + { + "completion_length": 132.5, + "epoch": 0.3372988103568929, + "grad_norm": 1.2302217483520508, + "kl": 0.038474034518003464, + "learning_rate": 2.9783452202252065e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 482 + }, + { + "completion_length": 195.92857360839844, + "epoch": 0.33799860041987406, + "grad_norm": 1.357618808746338, + "kl": 0.022870337590575218, + "learning_rate": 2.974507930755206e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 483 + }, + { + "completion_length": 168.07144165039062, + "epoch": 0.33869839048285516, + "grad_norm": 0.007778555620461702, + "kl": 0.04222830384969711, + "learning_rate": 2.970665931289722e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 484 + }, + { + "completion_length": 199.21429443359375, + "epoch": 0.33939818054583626, + "grad_norm": 1.7007582187652588, + "kl": 0.0240781269967556, + "learning_rate": 2.9668192403979194e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 485 + }, + { + "completion_length": 176.57144165039062, + "epoch": 0.34009797060881736, + "grad_norm": 0.004922912456095219, + "kl": 0.03378324955701828, + "learning_rate": 2.9629678766716414e-07, + "loss": 0.0, + "reward": 0.2857142984867096, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 486 + }, + { + "completion_length": 181.6428680419922, + "epoch": 0.34079776067179846, + "grad_norm": 0.008107264526188374, + "kl": 0.02736920677125454, + "learning_rate": 2.9591118587253125e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 487 + }, + { + "completion_length": 185.2857208251953, + "epoch": 0.34149755073477955, + "grad_norm": 1.4481292963027954, + "kl": 0.027283810079097748, + "learning_rate": 2.9552512051958545e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 488 + }, + { + "completion_length": 162.21429443359375, + "epoch": 0.34219734079776065, + "grad_norm": 2.112471103668213, + "kl": 0.04522133991122246, + "learning_rate": 2.951385934742592e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 489 + }, + { + "completion_length": 197.71429443359375, + "epoch": 0.34289713086074175, + "grad_norm": 1.134124755859375, + "kl": 0.028563635423779488, + "learning_rate": 2.947516066047166e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 490 + }, + { + "completion_length": 180.21429443359375, + "epoch": 0.3435969209237229, + "grad_norm": 0.8103861808776855, + "kl": 0.02636152133345604, + "learning_rate": 2.94364161781344e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 491 + }, + { + "completion_length": 153.1428680419922, + "epoch": 0.344296710986704, + "grad_norm": 1.6501652002334595, + "kl": 0.03942684456706047, + "learning_rate": 2.939762608767413e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 492 + }, + { + "completion_length": 146.2857208251953, + "epoch": 0.3449965010496851, + "grad_norm": 1.1731581687927246, + "kl": 0.04676751047372818, + "learning_rate": 2.9358790576571254e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 493 + }, + { + "completion_length": 184.1428680419922, + "epoch": 0.3456962911126662, + "grad_norm": 0.8028280138969421, + "kl": 0.028286494314670563, + "learning_rate": 2.9319909832525717e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 494 + }, + { + "completion_length": 130.85714721679688, + "epoch": 0.3463960811756473, + "grad_norm": 0.007896073162555695, + "kl": 0.04474465176463127, + "learning_rate": 2.9280984043456087e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 495 + }, + { + "completion_length": 235.07144165039062, + "epoch": 0.3470958712386284, + "grad_norm": 1.6852149963378906, + "kl": 0.01534675806760788, + "learning_rate": 2.9242013397498635e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 496 + }, + { + "completion_length": 181.92857360839844, + "epoch": 0.3477956613016095, + "grad_norm": 2.4247560501098633, + "kl": 0.034807994961738586, + "learning_rate": 2.920299808300643e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 497 + }, + { + "completion_length": 201.2857208251953, + "epoch": 0.34849545136459065, + "grad_norm": 2.073784112930298, + "kl": 0.03788765147328377, + "learning_rate": 2.9163938288548445e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 498 + }, + { + "completion_length": 188.1428680419922, + "epoch": 0.34919524142757175, + "grad_norm": 2.0151796340942383, + "kl": 0.039097972214221954, + "learning_rate": 2.912483420290863e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 499 + }, + { + "completion_length": 183.50001525878906, + "epoch": 0.34989503149055284, + "grad_norm": 1.8998000621795654, + "kl": 0.038978077471256256, + "learning_rate": 2.9085686015085005e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 500 + }, + { + "completion_length": 156.21429443359375, + "epoch": 0.35059482155353394, + "grad_norm": 2.9568324089050293, + "kl": 0.05164632946252823, + "learning_rate": 2.904649391428874e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 501 + }, + { + "completion_length": 180.85714721679688, + "epoch": 0.35129461161651504, + "grad_norm": 3.0119810104370117, + "kl": 0.034306906163692474, + "learning_rate": 2.9007258089943246e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 502 + }, + { + "completion_length": 218.00001525878906, + "epoch": 0.35199440167949614, + "grad_norm": 0.7019025087356567, + "kl": 0.03303629904985428, + "learning_rate": 2.896797873168326e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 503 + }, + { + "completion_length": 213.6428680419922, + "epoch": 0.35269419174247724, + "grad_norm": 1.0243908166885376, + "kl": 0.029559114947915077, + "learning_rate": 2.892865602935393e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 504 + }, + { + "completion_length": 168.92857360839844, + "epoch": 0.35339398180545833, + "grad_norm": 2.033384323120117, + "kl": 0.05405454337596893, + "learning_rate": 2.8889290173009897e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 505 + }, + { + "completion_length": 171.6428680419922, + "epoch": 0.3540937718684395, + "grad_norm": 0.8686595559120178, + "kl": 0.046510692685842514, + "learning_rate": 2.884988135291435e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 506 + }, + { + "completion_length": 170.57144165039062, + "epoch": 0.3547935619314206, + "grad_norm": 1.2175019979476929, + "kl": 0.05344880744814873, + "learning_rate": 2.881042975953817e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 507 + }, + { + "completion_length": 194.6428680419922, + "epoch": 0.3554933519944017, + "grad_norm": 1.1815731525421143, + "kl": 0.03906374052166939, + "learning_rate": 2.8770935583558944e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 508 + }, + { + "completion_length": 173.35714721679688, + "epoch": 0.3561931420573828, + "grad_norm": 0.009414693340659142, + "kl": 0.038596801459789276, + "learning_rate": 2.8731399015860074e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 509 + }, + { + "completion_length": 158.07144165039062, + "epoch": 0.3568929321203639, + "grad_norm": 0.007976559922099113, + "kl": 0.05238325893878937, + "learning_rate": 2.869182024752986e-07, + "loss": 0.0001, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 510 + }, + { + "completion_length": 190.35714721679688, + "epoch": 0.357592722183345, + "grad_norm": 1.144026279449463, + "kl": 0.03528675436973572, + "learning_rate": 2.865219946986054e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 511 + }, + { + "completion_length": 226.00001525878906, + "epoch": 0.3582925122463261, + "grad_norm": 1.22597336769104, + "kl": 0.015853432938456535, + "learning_rate": 2.8612536874347424e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 512 + }, + { + "completion_length": 206.00001525878906, + "epoch": 0.35899230230930723, + "grad_norm": 0.005469062831252813, + "kl": 0.020527929067611694, + "learning_rate": 2.8572832652687913e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 513 + }, + { + "completion_length": 184.35714721679688, + "epoch": 0.35969209237228833, + "grad_norm": 0.007139692083001137, + "kl": 0.039707526564598083, + "learning_rate": 2.853308699678061e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 514 + }, + { + "completion_length": 206.71429443359375, + "epoch": 0.36039188243526943, + "grad_norm": 0.8990456461906433, + "kl": 0.03507751226425171, + "learning_rate": 2.849330009872437e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 515 + }, + { + "completion_length": 215.1428680419922, + "epoch": 0.3610916724982505, + "grad_norm": 1.0554438829421997, + "kl": 0.046111151576042175, + "learning_rate": 2.845347215081738e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 516 + }, + { + "completion_length": 194.42857360839844, + "epoch": 0.3617914625612316, + "grad_norm": 1.4664751291275024, + "kl": 0.04037764295935631, + "learning_rate": 2.8413603345556234e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 517 + }, + { + "completion_length": 171.21429443359375, + "epoch": 0.3624912526242127, + "grad_norm": 1.1242681741714478, + "kl": 0.0358518548309803, + "learning_rate": 2.837369387563499e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 518 + }, + { + "completion_length": 217.42857360839844, + "epoch": 0.3631910426871938, + "grad_norm": 2.206460952758789, + "kl": 0.02211880125105381, + "learning_rate": 2.8333743933944265e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 519 + }, + { + "completion_length": 184.21429443359375, + "epoch": 0.363890832750175, + "grad_norm": 0.004289484582841396, + "kl": 0.02826312743127346, + "learning_rate": 2.829375371357025e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 520 + }, + { + "completion_length": 176.85714721679688, + "epoch": 0.3645906228131561, + "grad_norm": 0.006967071909457445, + "kl": 0.0346333347260952, + "learning_rate": 2.8253723407793853e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 521 + }, + { + "completion_length": 197.07144165039062, + "epoch": 0.36529041287613717, + "grad_norm": 2.3508365154266357, + "kl": 0.039042286574840546, + "learning_rate": 2.8213653210089685e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 522 + }, + { + "completion_length": 174.35714721679688, + "epoch": 0.36599020293911827, + "grad_norm": 0.7139243483543396, + "kl": 0.03926414996385574, + "learning_rate": 2.8173543314125194e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 523 + }, + { + "completion_length": 215.35714721679688, + "epoch": 0.36668999300209937, + "grad_norm": 0.8621992468833923, + "kl": 0.019353993237018585, + "learning_rate": 2.813339391375968e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 524 + }, + { + "completion_length": 175.07144165039062, + "epoch": 0.36738978306508047, + "grad_norm": 2.0378146171569824, + "kl": 0.044867780059576035, + "learning_rate": 2.8093205203043373e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 525 + }, + { + "completion_length": 174.00001525878906, + "epoch": 0.36808957312806156, + "grad_norm": 2.014314651489258, + "kl": 0.04805855080485344, + "learning_rate": 2.8052977376216507e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 526 + }, + { + "completion_length": 171.71429443359375, + "epoch": 0.36878936319104266, + "grad_norm": 1.53948175907135, + "kl": 0.035063281655311584, + "learning_rate": 2.8012710627708374e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 527 + }, + { + "completion_length": 154.85714721679688, + "epoch": 0.3694891532540238, + "grad_norm": 0.8369784355163574, + "kl": 0.06181350722908974, + "learning_rate": 2.7972405152136376e-07, + "loss": 0.0001, + "reward": 0.2142857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.2142857313156128, + "step": 528 + }, + { + "completion_length": 187.2857208251953, + "epoch": 0.3701889433170049, + "grad_norm": 2.59834361076355, + "kl": 0.03680596500635147, + "learning_rate": 2.7932061144305084e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 529 + }, + { + "completion_length": 184.2857208251953, + "epoch": 0.370888733379986, + "grad_norm": 2.813354969024658, + "kl": 0.04346339777112007, + "learning_rate": 2.7891678799205325e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 530 + }, + { + "completion_length": 198.71429443359375, + "epoch": 0.3715885234429671, + "grad_norm": 1.1584117412567139, + "kl": 0.02804117649793625, + "learning_rate": 2.78512583120132e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 531 + }, + { + "completion_length": 178.50001525878906, + "epoch": 0.3722883135059482, + "grad_norm": 1.3236713409423828, + "kl": 0.05426415801048279, + "learning_rate": 2.781079987808916e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 532 + }, + { + "completion_length": 165.2857208251953, + "epoch": 0.3729881035689293, + "grad_norm": 2.2150092124938965, + "kl": 0.054815713316202164, + "learning_rate": 2.777030369297707e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 533 + }, + { + "completion_length": 185.71429443359375, + "epoch": 0.3736878936319104, + "grad_norm": 1.8651477098464966, + "kl": 0.04314412921667099, + "learning_rate": 2.772976995240325e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 534 + }, + { + "completion_length": 196.1428680419922, + "epoch": 0.37438768369489156, + "grad_norm": 0.00770213408395648, + "kl": 0.037844039499759674, + "learning_rate": 2.768919885227551e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 535 + }, + { + "completion_length": 196.85714721679688, + "epoch": 0.37508747375787266, + "grad_norm": 2.5110208988189697, + "kl": 0.04185169190168381, + "learning_rate": 2.764859058868228e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 536 + }, + { + "completion_length": 191.35714721679688, + "epoch": 0.37578726382085376, + "grad_norm": 1.851997971534729, + "kl": 0.04296501353383064, + "learning_rate": 2.7607945357891546e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 537 + }, + { + "completion_length": 170.21429443359375, + "epoch": 0.37648705388383485, + "grad_norm": 2.525156259536743, + "kl": 0.05845579877495766, + "learning_rate": 2.7567263356350016e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 538 + }, + { + "completion_length": 192.57144165039062, + "epoch": 0.37718684394681595, + "grad_norm": 24.581480026245117, + "kl": 2.964402198791504, + "learning_rate": 2.752654478068208e-07, + "loss": 0.003, + "reward": 0.2857142984867096, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.2857142984867096, + "step": 539 + }, + { + "completion_length": 173.2857208251953, + "epoch": 0.37788663400979705, + "grad_norm": 1.9153430461883545, + "kl": 0.056273799389600754, + "learning_rate": 2.748578982768893e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 540 + }, + { + "completion_length": 170.57144165039062, + "epoch": 0.37858642407277815, + "grad_norm": 1.2073723077774048, + "kl": 0.0511692650616169, + "learning_rate": 2.7444998694347546e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 541 + }, + { + "completion_length": 184.21429443359375, + "epoch": 0.37928621413575925, + "grad_norm": 1.784035086631775, + "kl": 0.05286607891321182, + "learning_rate": 2.7404171577809803e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 542 + }, + { + "completion_length": 188.92857360839844, + "epoch": 0.3799860041987404, + "grad_norm": 2.3592467308044434, + "kl": 0.03701862320303917, + "learning_rate": 2.736330867540147e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 543 + }, + { + "completion_length": 210.07144165039062, + "epoch": 0.3806857942617215, + "grad_norm": 0.7970978617668152, + "kl": 0.04706348478794098, + "learning_rate": 2.732241018462129e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 544 + }, + { + "completion_length": 193.85714721679688, + "epoch": 0.3813855843247026, + "grad_norm": 1.835613489151001, + "kl": 0.06348719447851181, + "learning_rate": 2.7281476303140013e-07, + "loss": 0.0001, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 545 + }, + { + "completion_length": 189.92857360839844, + "epoch": 0.3820853743876837, + "grad_norm": 3.5407755374908447, + "kl": 0.05623449757695198, + "learning_rate": 2.724050722879941e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 546 + }, + { + "completion_length": 213.42857360839844, + "epoch": 0.3827851644506648, + "grad_norm": 1.8167250156402588, + "kl": 0.021622229367494583, + "learning_rate": 2.719950315961139e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 547 + }, + { + "completion_length": 195.07144165039062, + "epoch": 0.3834849545136459, + "grad_norm": 2.0906736850738525, + "kl": 0.05723336338996887, + "learning_rate": 2.715846429375697e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 548 + }, + { + "completion_length": 182.21429443359375, + "epoch": 0.384184744576627, + "grad_norm": 3.0801124572753906, + "kl": 0.056581608951091766, + "learning_rate": 2.711739082958536e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 549 + }, + { + "completion_length": 167.1428680419922, + "epoch": 0.38488453463960814, + "grad_norm": 2.2385222911834717, + "kl": 0.08181693404912949, + "learning_rate": 2.7076282965612963e-07, + "loss": 0.0001, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 550 + }, + { + "completion_length": 199.6428680419922, + "epoch": 0.38558432470258924, + "grad_norm": 2.1039581298828125, + "kl": 0.058807622641325, + "learning_rate": 2.7035140900522504e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 551 + }, + { + "completion_length": 216.50001525878906, + "epoch": 0.38628411476557034, + "grad_norm": 2.4044368267059326, + "kl": 0.033771004527807236, + "learning_rate": 2.699396483316193e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 552 + }, + { + "completion_length": 188.85714721679688, + "epoch": 0.38698390482855144, + "grad_norm": 2.4988067150115967, + "kl": 0.07769946753978729, + "learning_rate": 2.69527549625436e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 553 + }, + { + "completion_length": 181.1428680419922, + "epoch": 0.38768369489153254, + "grad_norm": 2.1368510723114014, + "kl": 0.054260946810245514, + "learning_rate": 2.691151148784321e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 554 + }, + { + "completion_length": 196.50001525878906, + "epoch": 0.38838348495451364, + "grad_norm": 2.683488130569458, + "kl": 0.03385968133807182, + "learning_rate": 2.687023460839887e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 555 + }, + { + "completion_length": 216.35714721679688, + "epoch": 0.38908327501749473, + "grad_norm": 0.009019927121698856, + "kl": 0.03483878821134567, + "learning_rate": 2.6828924523710166e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 556 + }, + { + "completion_length": 176.50001525878906, + "epoch": 0.38978306508047583, + "grad_norm": 1.9738969802856445, + "kl": 0.0592254213988781, + "learning_rate": 2.678758143343715e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 557 + }, + { + "completion_length": 183.7857208251953, + "epoch": 0.390482855143457, + "grad_norm": 1.8640234470367432, + "kl": 0.052767064422369, + "learning_rate": 2.674620553739941e-07, + "loss": 0.0001, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 558 + }, + { + "completion_length": 207.50001525878906, + "epoch": 0.3911826452064381, + "grad_norm": 0.012400349602103233, + "kl": 0.04630473628640175, + "learning_rate": 2.670479703557508e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 559 + }, + { + "completion_length": 184.1428680419922, + "epoch": 0.3918824352694192, + "grad_norm": 2.8871028423309326, + "kl": 0.056125763803720474, + "learning_rate": 2.66633561280999e-07, + "loss": 0.0001, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 560 + }, + { + "completion_length": 177.21429443359375, + "epoch": 0.3925822253324003, + "grad_norm": 2.44193696975708, + "kl": 0.06930401176214218, + "learning_rate": 2.662188301526621e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 561 + }, + { + "completion_length": 176.92857360839844, + "epoch": 0.3932820153953814, + "grad_norm": 3.4117348194122314, + "kl": 0.08207827061414719, + "learning_rate": 2.658037789752204e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 562 + }, + { + "completion_length": 180.92857360839844, + "epoch": 0.3939818054583625, + "grad_norm": 4.170308589935303, + "kl": 0.06983046233654022, + "learning_rate": 2.653884097547006e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 563 + }, + { + "completion_length": 167.0, + "epoch": 0.3946815955213436, + "grad_norm": 4.456571578979492, + "kl": 0.09263132512569427, + "learning_rate": 2.6497272449866704e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 564 + }, + { + "completion_length": 182.1428680419922, + "epoch": 0.39538138558432473, + "grad_norm": 2.575380563735962, + "kl": 0.051423076540231705, + "learning_rate": 2.645567252162111e-07, + "loss": 0.0001, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 565 + }, + { + "completion_length": 178.57144165039062, + "epoch": 0.3960811756473058, + "grad_norm": 2.034536123275757, + "kl": 0.06636127829551697, + "learning_rate": 2.641404139179422e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 566 + }, + { + "completion_length": 208.07144165039062, + "epoch": 0.3967809657102869, + "grad_norm": 1.6797178983688354, + "kl": 0.05298960953950882, + "learning_rate": 2.6372379261597784e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 567 + }, + { + "completion_length": 219.1428680419922, + "epoch": 0.397480755773268, + "grad_norm": 1.012208342552185, + "kl": 0.02927366830408573, + "learning_rate": 2.633068633239335e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 568 + }, + { + "completion_length": 168.1428680419922, + "epoch": 0.3981805458362491, + "grad_norm": 4.316521644592285, + "kl": 0.09156784415245056, + "learning_rate": 2.6288962805691354e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 569 + }, + { + "completion_length": 162.2857208251953, + "epoch": 0.3988803358992302, + "grad_norm": 3.2122459411621094, + "kl": 0.08237835764884949, + "learning_rate": 2.62472088831501e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 570 + }, + { + "completion_length": 177.7857208251953, + "epoch": 0.3995801259622113, + "grad_norm": 0.022715821862220764, + "kl": 0.08199223130941391, + "learning_rate": 2.620542476657482e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 571 + }, + { + "completion_length": 167.1428680419922, + "epoch": 0.4002799160251924, + "grad_norm": 2.4406580924987793, + "kl": 0.0790218934416771, + "learning_rate": 2.616361065791665e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 572 + }, + { + "completion_length": 156.07144165039062, + "epoch": 0.40097970608817357, + "grad_norm": 0.02388128452003002, + "kl": 0.09695427119731903, + "learning_rate": 2.6121766759271714e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 573 + }, + { + "completion_length": 169.35714721679688, + "epoch": 0.40167949615115467, + "grad_norm": 2.7152810096740723, + "kl": 0.08760807663202286, + "learning_rate": 2.6079893272880096e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 574 + }, + { + "completion_length": 172.1428680419922, + "epoch": 0.40237928621413577, + "grad_norm": 3.6040616035461426, + "kl": 0.0899227112531662, + "learning_rate": 2.60379904011249e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 575 + }, + { + "completion_length": 174.35714721679688, + "epoch": 0.40307907627711687, + "grad_norm": 2.5446419715881348, + "kl": 0.08045003563165665, + "learning_rate": 2.599605834653124e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 576 + }, + { + "completion_length": 184.42857360839844, + "epoch": 0.40377886634009796, + "grad_norm": 2.3170862197875977, + "kl": 0.0715753585100174, + "learning_rate": 2.595409731176529e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 577 + }, + { + "completion_length": 167.92857360839844, + "epoch": 0.40447865640307906, + "grad_norm": 3.1105737686157227, + "kl": 0.08440612256526947, + "learning_rate": 2.5912107499633276e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 578 + }, + { + "completion_length": 223.4285888671875, + "epoch": 0.40517844646606016, + "grad_norm": 2.328325033187866, + "kl": 0.03954140096902847, + "learning_rate": 2.587008911308053e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 579 + }, + { + "completion_length": 208.42857360839844, + "epoch": 0.4058782365290413, + "grad_norm": 1.0618088245391846, + "kl": 0.050909895449876785, + "learning_rate": 2.582804235519047e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 580 + }, + { + "completion_length": 193.2857208251953, + "epoch": 0.4065780265920224, + "grad_norm": 1.4221429824829102, + "kl": 0.03781288117170334, + "learning_rate": 2.578596742918365e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 581 + }, + { + "completion_length": 165.0, + "epoch": 0.4072778166550035, + "grad_norm": 1.6162134408950806, + "kl": 0.0694863349199295, + "learning_rate": 2.5743864538416773e-07, + "loss": 0.0001, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 582 + }, + { + "completion_length": 194.35714721679688, + "epoch": 0.4079776067179846, + "grad_norm": 4.4587721824646, + "kl": 0.046782199293375015, + "learning_rate": 2.570173388638169e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.5050762891769409, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 583 + }, + { + "completion_length": 198.92857360839844, + "epoch": 0.4086773967809657, + "grad_norm": 1.545332431793213, + "kl": 0.05908704176545143, + "learning_rate": 2.565957567670442e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 584 + }, + { + "completion_length": 187.57144165039062, + "epoch": 0.4093771868439468, + "grad_norm": 3.9582760334014893, + "kl": 0.05953739210963249, + "learning_rate": 2.5617390113144195e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 585 + }, + { + "completion_length": 187.7857208251953, + "epoch": 0.4100769769069279, + "grad_norm": 0.00880065280944109, + "kl": 0.046849362552165985, + "learning_rate": 2.557517739959244e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 586 + }, + { + "completion_length": 194.2857208251953, + "epoch": 0.410776766969909, + "grad_norm": 3.71048903465271, + "kl": 0.044254012405872345, + "learning_rate": 2.553293774007181e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 587 + }, + { + "completion_length": 175.07144165039062, + "epoch": 0.41147655703289016, + "grad_norm": 2.1399154663085938, + "kl": 0.058250073343515396, + "learning_rate": 2.5490671338735175e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 588 + }, + { + "completion_length": 224.71429443359375, + "epoch": 0.41217634709587125, + "grad_norm": 1.0939371585845947, + "kl": 0.02916034124791622, + "learning_rate": 2.544837839986468e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 589 + }, + { + "completion_length": 147.7857208251953, + "epoch": 0.41287613715885235, + "grad_norm": 2.57330060005188, + "kl": 0.07541820406913757, + "learning_rate": 2.5406059127870726e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 590 + }, + { + "completion_length": 158.71429443359375, + "epoch": 0.41357592722183345, + "grad_norm": 2.0319535732269287, + "kl": 0.06947360932826996, + "learning_rate": 2.536371372729097e-07, + "loss": 0.0001, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 591 + }, + { + "completion_length": 173.07144165039062, + "epoch": 0.41427571728481455, + "grad_norm": 2.184640884399414, + "kl": 0.05149083212018013, + "learning_rate": 2.532134240278937e-07, + "loss": 0.0001, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 592 + }, + { + "completion_length": 186.6428680419922, + "epoch": 0.41497550734779565, + "grad_norm": 0.007053438574075699, + "kl": 0.04499982297420502, + "learning_rate": 2.5278945359155177e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 593 + }, + { + "completion_length": 154.71429443359375, + "epoch": 0.41567529741077675, + "grad_norm": 2.4145848751068115, + "kl": 0.05270431935787201, + "learning_rate": 2.523652280130194e-07, + "loss": 0.0001, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 594 + }, + { + "completion_length": 139.6428680419922, + "epoch": 0.4163750874737579, + "grad_norm": 1.4822930097579956, + "kl": 0.06764822453260422, + "learning_rate": 2.5194074934266536e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 595 + }, + { + "completion_length": 154.21429443359375, + "epoch": 0.417074877536739, + "grad_norm": 2.3014943599700928, + "kl": 0.06624924391508102, + "learning_rate": 2.515160196320815e-07, + "loss": 0.0001, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 596 + }, + { + "completion_length": 179.42857360839844, + "epoch": 0.4177746675997201, + "grad_norm": 1.316183090209961, + "kl": 0.04376129060983658, + "learning_rate": 2.510910409340732e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 597 + }, + { + "completion_length": 190.1428680419922, + "epoch": 0.4184744576627012, + "grad_norm": 2.9795424938201904, + "kl": 0.05925116315484047, + "learning_rate": 2.5066581530264897e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 598 + }, + { + "completion_length": 181.2857208251953, + "epoch": 0.4191742477256823, + "grad_norm": 3.705047130584717, + "kl": 0.05754886567592621, + "learning_rate": 2.5024034479301117e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 599 + }, + { + "completion_length": 212.21429443359375, + "epoch": 0.4198740377886634, + "grad_norm": 1.580612301826477, + "kl": 0.030571185052394867, + "learning_rate": 2.498146314615454e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 600 + }, + { + "completion_length": 178.42857360839844, + "epoch": 0.4205738278516445, + "grad_norm": 0.00882403552532196, + "kl": 0.05023515596985817, + "learning_rate": 2.493886773658111e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 601 + }, + { + "completion_length": 154.57144165039062, + "epoch": 0.4212736179146256, + "grad_norm": 1.8183329105377197, + "kl": 0.051789525896310806, + "learning_rate": 2.48962484564531e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 602 + }, + { + "completion_length": 204.92857360839844, + "epoch": 0.42197340797760674, + "grad_norm": 1.2268946170806885, + "kl": 0.04350917041301727, + "learning_rate": 2.485360551175819e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 603 + }, + { + "completion_length": 153.57144165039062, + "epoch": 0.42267319804058784, + "grad_norm": 2.0239200592041016, + "kl": 0.05288690701127052, + "learning_rate": 2.481093910859844e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 604 + }, + { + "completion_length": 158.85714721679688, + "epoch": 0.42337298810356894, + "grad_norm": 1.8971515893936157, + "kl": 0.06349381804466248, + "learning_rate": 2.4768249453189254e-07, + "loss": 0.0001, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 605 + }, + { + "completion_length": 190.92857360839844, + "epoch": 0.42407277816655004, + "grad_norm": 2.755600690841675, + "kl": 0.03697388619184494, + "learning_rate": 2.4725536751858447e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 606 + }, + { + "completion_length": 181.57144165039062, + "epoch": 0.42477256822953113, + "grad_norm": 0.006492472253739834, + "kl": 0.043367899954319, + "learning_rate": 2.468280121104521e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 607 + }, + { + "completion_length": 217.00001525878906, + "epoch": 0.42547235829251223, + "grad_norm": 1.355871558189392, + "kl": 0.0219882819801569, + "learning_rate": 2.4640043037299134e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 608 + }, + { + "completion_length": 214.07144165039062, + "epoch": 0.42617214835549333, + "grad_norm": 0.8965709805488586, + "kl": 0.050184134393930435, + "learning_rate": 2.4597262437279166e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 609 + }, + { + "completion_length": 220.21429443359375, + "epoch": 0.4268719384184745, + "grad_norm": 1.6475117206573486, + "kl": 0.032147444784641266, + "learning_rate": 2.455445961775269e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 610 + }, + { + "completion_length": 191.1428680419922, + "epoch": 0.4275717284814556, + "grad_norm": 3.3601479530334473, + "kl": 0.03844405338168144, + "learning_rate": 2.4511634785594437e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 611 + }, + { + "completion_length": 207.85714721679688, + "epoch": 0.4282715185444367, + "grad_norm": 1.5599465370178223, + "kl": 0.03253539651632309, + "learning_rate": 2.4468788147785575e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 612 + }, + { + "completion_length": 202.57144165039062, + "epoch": 0.4289713086074178, + "grad_norm": 2.3960530757904053, + "kl": 0.04084521159529686, + "learning_rate": 2.4425919911412615e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 613 + }, + { + "completion_length": 166.85714721679688, + "epoch": 0.4296710986703989, + "grad_norm": 1.580222725868225, + "kl": 0.044861774891614914, + "learning_rate": 2.43830302836665e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 614 + }, + { + "completion_length": 226.9285888671875, + "epoch": 0.43037088873338, + "grad_norm": 2.250927209854126, + "kl": 0.030000748112797737, + "learning_rate": 2.4340119471841535e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 615 + }, + { + "completion_length": 183.2857208251953, + "epoch": 0.4310706787963611, + "grad_norm": 0.007283986546099186, + "kl": 0.05382065102458, + "learning_rate": 2.429718768333443e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 616 + }, + { + "completion_length": 172.35714721679688, + "epoch": 0.43177046885934217, + "grad_norm": 2.248180866241455, + "kl": 0.04193298891186714, + "learning_rate": 2.4254235125643256e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 617 + }, + { + "completion_length": 178.85714721679688, + "epoch": 0.4324702589223233, + "grad_norm": 0.005598884075880051, + "kl": 0.039547648280858994, + "learning_rate": 2.4211262006366487e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 618 + }, + { + "completion_length": 194.7857208251953, + "epoch": 0.4331700489853044, + "grad_norm": 0.948202908039093, + "kl": 0.034319400787353516, + "learning_rate": 2.4168268533201974e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 619 + }, + { + "completion_length": 191.2857208251953, + "epoch": 0.4338698390482855, + "grad_norm": 1.6938560009002686, + "kl": 0.04942072555422783, + "learning_rate": 2.412525491394593e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 620 + }, + { + "completion_length": 156.0, + "epoch": 0.4345696291112666, + "grad_norm": 1.8129706382751465, + "kl": 0.048300568014383316, + "learning_rate": 2.4082221356491945e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 621 + }, + { + "completion_length": 187.7857208251953, + "epoch": 0.4352694191742477, + "grad_norm": 1.6512104272842407, + "kl": 0.03514597937464714, + "learning_rate": 2.403916806882998e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 622 + }, + { + "completion_length": 227.57144165039062, + "epoch": 0.4359692092372288, + "grad_norm": 1.0237085819244385, + "kl": 0.02763524278998375, + "learning_rate": 2.399609525904536e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 623 + }, + { + "completion_length": 179.35714721679688, + "epoch": 0.4366689993002099, + "grad_norm": 2.3410961627960205, + "kl": 0.06135363504290581, + "learning_rate": 2.3953003135317725e-07, + "loss": 0.0001, + "reward": 0.3571428656578064, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 624 + }, + { + "completion_length": 164.5, + "epoch": 0.43736878936319107, + "grad_norm": 2.9406211376190186, + "kl": 0.05737534910440445, + "learning_rate": 2.3909891905920116e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 625 + }, + { + "completion_length": 230.57144165039062, + "epoch": 0.43806857942617217, + "grad_norm": 1.5693167448043823, + "kl": 0.03000093251466751, + "learning_rate": 2.386676177921789e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 626 + }, + { + "completion_length": 174.6428680419922, + "epoch": 0.43876836948915326, + "grad_norm": 0.006668443791568279, + "kl": 0.0467531643807888, + "learning_rate": 2.3823612963667748e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 627 + }, + { + "completion_length": 195.6428680419922, + "epoch": 0.43946815955213436, + "grad_norm": 1.811854600906372, + "kl": 0.034590449184179306, + "learning_rate": 2.3780445667816697e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 628 + }, + { + "completion_length": 201.71429443359375, + "epoch": 0.44016794961511546, + "grad_norm": 0.00650835270062089, + "kl": 0.03414328768849373, + "learning_rate": 2.3737260100301086e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 629 + }, + { + "completion_length": 217.6428680419922, + "epoch": 0.44086773967809656, + "grad_norm": 1.516406774520874, + "kl": 0.02930428460240364, + "learning_rate": 2.369405646984556e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 630 + }, + { + "completion_length": 172.35714721679688, + "epoch": 0.44156752974107766, + "grad_norm": 2.1428630352020264, + "kl": 0.03527773916721344, + "learning_rate": 2.3650834985262083e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 631 + }, + { + "completion_length": 213.6428680419922, + "epoch": 0.44226731980405876, + "grad_norm": 1.0643141269683838, + "kl": 0.032824594527482986, + "learning_rate": 2.3607595855448893e-07, + "loss": 0.0, + "reward": 0.3571428656578064, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.3571428656578064, + "step": 632 + }, + { + "completion_length": 204.92857360839844, + "epoch": 0.4429671098670399, + "grad_norm": 2.404583215713501, + "kl": 0.027230154722929, + "learning_rate": 2.3564339289389513e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 633 + }, + { + "completion_length": 169.92857360839844, + "epoch": 0.443666899930021, + "grad_norm": 1.9238661527633667, + "kl": 0.041090719401836395, + "learning_rate": 2.3521065496151765e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 634 + }, + { + "completion_length": 210.1428680419922, + "epoch": 0.4443666899930021, + "grad_norm": 0.8436352610588074, + "kl": 0.0280954260379076, + "learning_rate": 2.347777468488669e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 635 + }, + { + "completion_length": 216.57144165039062, + "epoch": 0.4450664800559832, + "grad_norm": 0.0065932204015553, + "kl": 0.02991357073187828, + "learning_rate": 2.3434467064827616e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 636 + }, + { + "completion_length": 197.21429443359375, + "epoch": 0.4457662701189643, + "grad_norm": 1.2168712615966797, + "kl": 0.036863137036561966, + "learning_rate": 2.3391142845289097e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.5, + "step": 637 + }, + { + "completion_length": 159.71429443359375, + "epoch": 0.4464660601819454, + "grad_norm": 0.009670078754425049, + "kl": 0.05564764887094498, + "learning_rate": 2.334780223566592e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 638 + }, + { + "completion_length": 166.7857208251953, + "epoch": 0.4471658502449265, + "grad_norm": 2.02837872505188, + "kl": 0.050884928554296494, + "learning_rate": 2.3304445445432077e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 639 + }, + { + "completion_length": 171.85714721679688, + "epoch": 0.44786564030790765, + "grad_norm": 0.010904277674853802, + "kl": 0.05013133957982063, + "learning_rate": 2.3261072684139785e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 640 + }, + { + "completion_length": 187.6428680419922, + "epoch": 0.44856543037088875, + "grad_norm": 0.007786628790199757, + "kl": 0.044351689517498016, + "learning_rate": 2.3217684161418436e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 641 + }, + { + "completion_length": 147.42857360839844, + "epoch": 0.44926522043386985, + "grad_norm": 2.2840495109558105, + "kl": 0.04322770982980728, + "learning_rate": 2.3174280086973605e-07, + "loss": 0.0, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 642 + }, + { + "completion_length": 198.85714721679688, + "epoch": 0.44996501049685095, + "grad_norm": 1.339922308921814, + "kl": 0.04232291132211685, + "learning_rate": 2.3130860670586032e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 643 + }, + { + "completion_length": 198.71429443359375, + "epoch": 0.45066480055983205, + "grad_norm": 1.6301075220108032, + "kl": 0.02942308969795704, + "learning_rate": 2.308742612211061e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 644 + }, + { + "completion_length": 161.07144165039062, + "epoch": 0.45136459062281314, + "grad_norm": 1.520746111869812, + "kl": 0.048625752329826355, + "learning_rate": 2.3043976651475366e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 645 + }, + { + "completion_length": 206.71429443359375, + "epoch": 0.45206438068579424, + "grad_norm": 1.8411654233932495, + "kl": 0.03329163417220116, + "learning_rate": 2.300051246868044e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 646 + }, + { + "completion_length": 196.07144165039062, + "epoch": 0.45276417074877534, + "grad_norm": 1.7877939939498901, + "kl": 0.0342370830476284, + "learning_rate": 2.2957033783797098e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 647 + }, + { + "completion_length": 167.57144165039062, + "epoch": 0.4534639608117565, + "grad_norm": 1.2771666049957275, + "kl": 0.04394586384296417, + "learning_rate": 2.2913540806966676e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 648 + }, + { + "completion_length": 172.92857360839844, + "epoch": 0.4541637508747376, + "grad_norm": 1.193971872329712, + "kl": 0.046529728919267654, + "learning_rate": 2.2870033748399612e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 649 + }, + { + "completion_length": 178.1428680419922, + "epoch": 0.4548635409377187, + "grad_norm": 2.165350914001465, + "kl": 0.048747967928647995, + "learning_rate": 2.2826512818374381e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 650 + }, + { + "completion_length": 198.71429443359375, + "epoch": 0.4555633310006998, + "grad_norm": 2.628448247909546, + "kl": 0.053916797041893005, + "learning_rate": 2.278297822723651e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 651 + }, + { + "completion_length": 169.35714721679688, + "epoch": 0.4562631210636809, + "grad_norm": 2.885871648788452, + "kl": 0.0751948207616806, + "learning_rate": 2.273943018539755e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 652 + }, + { + "completion_length": 162.57144165039062, + "epoch": 0.456962911126662, + "grad_norm": 0.007941286079585552, + "kl": 0.047953445464372635, + "learning_rate": 2.2695868903334072e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 653 + }, + { + "completion_length": 192.6428680419922, + "epoch": 0.4576627011896431, + "grad_norm": 0.00795914325863123, + "kl": 0.048438169062137604, + "learning_rate": 2.2652294591586621e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 654 + }, + { + "completion_length": 194.50001525878906, + "epoch": 0.45836249125262424, + "grad_norm": 1.705178141593933, + "kl": 0.03709828481078148, + "learning_rate": 2.260870746075874e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 655 + }, + { + "completion_length": 140.57144165039062, + "epoch": 0.45906228131560534, + "grad_norm": 1.660890817642212, + "kl": 0.0642286092042923, + "learning_rate": 2.256510772151591e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 656 + }, + { + "completion_length": 199.2857208251953, + "epoch": 0.45976207137858643, + "grad_norm": 0.007773585617542267, + "kl": 0.045362599194049835, + "learning_rate": 2.2521495584584564e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 657 + }, + { + "completion_length": 224.35714721679688, + "epoch": 0.46046186144156753, + "grad_norm": 0.006898650899529457, + "kl": 0.03265133500099182, + "learning_rate": 2.2477871260751047e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 658 + }, + { + "completion_length": 217.00001525878906, + "epoch": 0.46116165150454863, + "grad_norm": 2.767052173614502, + "kl": 0.04190690070390701, + "learning_rate": 2.2434234960860604e-07, + "loss": 0.0, + "reward": 0.4285714626312256, + "reward_std": 0.4040610194206238, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 659 + }, + { + "completion_length": 182.21429443359375, + "epoch": 0.46186144156752973, + "grad_norm": 1.0015095472335815, + "kl": 0.04846643656492233, + "learning_rate": 2.239058689581638e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 660 + }, + { + "completion_length": 194.6428680419922, + "epoch": 0.4625612316305108, + "grad_norm": 2.769559383392334, + "kl": 0.04055805504322052, + "learning_rate": 2.234692727657836e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 661 + }, + { + "completion_length": 128.1428680419922, + "epoch": 0.4632610216934919, + "grad_norm": 1.6715753078460693, + "kl": 0.07706505060195923, + "learning_rate": 2.230325631416239e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 662 + }, + { + "completion_length": 197.57144165039062, + "epoch": 0.4639608117564731, + "grad_norm": 2.4168155193328857, + "kl": 0.04533996060490608, + "learning_rate": 2.2259574219639124e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 663 + }, + { + "completion_length": 194.35714721679688, + "epoch": 0.4646606018194542, + "grad_norm": 0.8889102935791016, + "kl": 0.03331875056028366, + "learning_rate": 2.2215881204133047e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 664 + }, + { + "completion_length": 181.6428680419922, + "epoch": 0.4653603918824353, + "grad_norm": 1.8534014225006104, + "kl": 0.03304433822631836, + "learning_rate": 2.2172177478821395e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 665 + }, + { + "completion_length": 222.4285888671875, + "epoch": 0.4660601819454164, + "grad_norm": 1.8191381692886353, + "kl": 0.028013775125145912, + "learning_rate": 2.2128463254933186e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 666 + }, + { + "completion_length": 193.00001525878906, + "epoch": 0.4667599720083975, + "grad_norm": 1.7169095277786255, + "kl": 0.022591177374124527, + "learning_rate": 2.2084738743748174e-07, + "loss": 0.0, + "reward": 0.7142857313156128, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 667 + }, + { + "completion_length": 189.7857208251953, + "epoch": 0.46745976207137857, + "grad_norm": 0.011743742041289806, + "kl": 0.05124860256910324, + "learning_rate": 2.2041004156595845e-07, + "loss": 0.0001, + "reward": 0.4285714626312256, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.4285714626312256, + "step": 668 + }, + { + "completion_length": 194.7857208251953, + "epoch": 0.46815955213435967, + "grad_norm": 1.3765090703964233, + "kl": 0.04434319958090782, + "learning_rate": 2.199725970485436e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 669 + }, + { + "completion_length": 147.42857360839844, + "epoch": 0.4688593421973408, + "grad_norm": 1.9276536703109741, + "kl": 0.06591746211051941, + "learning_rate": 2.1953505599949573e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 670 + }, + { + "completion_length": 175.21429443359375, + "epoch": 0.4695591322603219, + "grad_norm": 2.328869581222534, + "kl": 0.06332084536552429, + "learning_rate": 2.1909742053354003e-07, + "loss": 0.0001, + "reward": 0.5, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.5, + "step": 671 + }, + { + "completion_length": 211.07144165039062, + "epoch": 0.470258922323303, + "grad_norm": 0.9379087686538696, + "kl": 0.03213494271039963, + "learning_rate": 2.1865969276585786e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 672 + }, + { + "completion_length": 167.35714721679688, + "epoch": 0.4709587123862841, + "grad_norm": 0.009628983214497566, + "kl": 0.06781083345413208, + "learning_rate": 2.1822187481207672e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 673 + }, + { + "completion_length": 186.1428680419922, + "epoch": 0.4716585024492652, + "grad_norm": 1.3198316097259521, + "kl": 0.0385563038289547, + "learning_rate": 2.1778396878826006e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 674 + }, + { + "completion_length": 172.92857360839844, + "epoch": 0.4723582925122463, + "grad_norm": 0.007041162345558405, + "kl": 0.052339326590299606, + "learning_rate": 2.17345976810897e-07, + "loss": 0.0001, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 675 + }, + { + "completion_length": 214.00001525878906, + "epoch": 0.4730580825752274, + "grad_norm": 1.0516942739486694, + "kl": 0.03277459740638733, + "learning_rate": 2.1690790099689193e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 676 + }, + { + "completion_length": 212.2857208251953, + "epoch": 0.4737578726382085, + "grad_norm": 0.0056027439422905445, + "kl": 0.038574155420064926, + "learning_rate": 2.164697434635547e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 677 + }, + { + "completion_length": 171.6428680419922, + "epoch": 0.47445766270118966, + "grad_norm": 1.6738466024398804, + "kl": 0.05795716121792793, + "learning_rate": 2.1603150632858983e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 678 + }, + { + "completion_length": 172.07144165039062, + "epoch": 0.47515745276417076, + "grad_norm": 1.538047432899475, + "kl": 0.040635015815496445, + "learning_rate": 2.1559319171008696e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 679 + }, + { + "completion_length": 197.50001525878906, + "epoch": 0.47585724282715186, + "grad_norm": 1.2881625890731812, + "kl": 0.03642135113477707, + "learning_rate": 2.151548017265098e-07, + "loss": 0.0, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 680 + }, + { + "completion_length": 214.2857208251953, + "epoch": 0.47655703289013296, + "grad_norm": 0.8933217525482178, + "kl": 0.031822219491004944, + "learning_rate": 2.1471633849668663e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 681 + }, + { + "completion_length": 199.21429443359375, + "epoch": 0.47725682295311406, + "grad_norm": 1.5765355825424194, + "kl": 0.032428424805402756, + "learning_rate": 2.142778041397995e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 682 + }, + { + "completion_length": 168.85714721679688, + "epoch": 0.47795661301609516, + "grad_norm": 1.352317214012146, + "kl": 0.05361265316605568, + "learning_rate": 2.1383920077537443e-07, + "loss": 0.0001, + "reward": 0.8571429252624512, + "reward_std": 0.2020305097103119, + "rewards/check_gptzero_func": 0.8571429252624512, + "step": 683 + }, + { + "completion_length": 166.71429443359375, + "epoch": 0.47865640307907625, + "grad_norm": 1.072190761566162, + "kl": 0.04521774500608444, + "learning_rate": 2.1340053052327084e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 684 + }, + { + "completion_length": 223.6428680419922, + "epoch": 0.4793561931420574, + "grad_norm": 1.0753237009048462, + "kl": 0.0425286665558815, + "learning_rate": 2.1296179550367151e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 685 + }, + { + "completion_length": 193.00001525878906, + "epoch": 0.4800559832050385, + "grad_norm": 1.2362627983093262, + "kl": 0.044981617480516434, + "learning_rate": 2.125229978370723e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 686 + }, + { + "completion_length": 197.71429443359375, + "epoch": 0.4807557732680196, + "grad_norm": 0.008858474902808666, + "kl": 0.05235905200242996, + "learning_rate": 2.1208413964427167e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 687 + }, + { + "completion_length": 170.7857208251953, + "epoch": 0.4814555633310007, + "grad_norm": 0.9643062949180603, + "kl": 0.057162944227457047, + "learning_rate": 2.116452230463608e-07, + "loss": 0.0001, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 688 + }, + { + "completion_length": 161.5, + "epoch": 0.4821553533939818, + "grad_norm": 2.4657928943634033, + "kl": 0.073735311627388, + "learning_rate": 2.11206250164713e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 689 + }, + { + "completion_length": 161.5, + "epoch": 0.4828551434569629, + "grad_norm": 0.010531275533139706, + "kl": 0.06991841644048691, + "learning_rate": 2.107672231209738e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 690 + }, + { + "completion_length": 180.6428680419922, + "epoch": 0.483554933519944, + "grad_norm": 1.2783242464065552, + "kl": 0.05982107296586037, + "learning_rate": 2.1032814403705025e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 691 + }, + { + "completion_length": 218.00001525878906, + "epoch": 0.4842547235829251, + "grad_norm": 0.006291645113378763, + "kl": 0.03857533633708954, + "learning_rate": 2.098890150351013e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 692 + }, + { + "completion_length": 185.50001525878906, + "epoch": 0.48495451364590625, + "grad_norm": 0.006465044338256121, + "kl": 0.04398878663778305, + "learning_rate": 2.0944983823752663e-07, + "loss": 0.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_gptzero_func": 1.0, + "step": 693 + }, + { + "completion_length": 194.00001525878906, + "epoch": 0.48565430370888735, + "grad_norm": 1.1355234384536743, + "kl": 0.047919515520334244, + "learning_rate": 2.0901061576695752e-07, + "loss": 0.0, + "reward": 0.6428571939468384, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 694 + }, + { + "completion_length": 182.6428680419922, + "epoch": 0.48635409377186845, + "grad_norm": 0.007984691299498081, + "kl": 0.05330199375748634, + "learning_rate": 2.0857134974624557e-07, + "loss": 0.0001, + "reward": 0.7142857313156128, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.7142857313156128, + "step": 695 + }, + { + "completion_length": 199.00001525878906, + "epoch": 0.48705388383484954, + "grad_norm": 1.6554975509643555, + "kl": 0.03912676125764847, + "learning_rate": 2.0813204229845298e-07, + "loss": 0.0, + "reward": 0.785714328289032, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.785714328289032, + "step": 696 + }, + { + "completion_length": 170.6428680419922, + "epoch": 0.48775367389783064, + "grad_norm": 2.764085054397583, + "kl": 0.06136271730065346, + "learning_rate": 2.0769269554684228e-07, + "loss": 0.0001, + "reward": 0.6428571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_gptzero_func": 0.6428571939468384, + "step": 697 + }, + { + "completion_length": 209.6428680419922, + "epoch": 0.48845346396081174, + "grad_norm": 1.0625824928283691, + "kl": 0.03584383800625801, + "learning_rate": 2.0725331161486577e-07, + "loss": 0.0, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 698 + }, + { + "completion_length": 203.07144165039062, + "epoch": 0.48915325402379284, + "grad_norm": 0.005635551642626524, + "kl": 0.03456791117787361, + "learning_rate": 2.068138926261557e-07, + "loss": 0.0, + "reward": 0.5714285969734192, + "reward_std": 0.0, + "rewards/check_gptzero_func": 0.5714285969734192, + "step": 699 + }, + { + "completion_length": 155.07144165039062, + "epoch": 0.489853044086774, + "grad_norm": 1.5510985851287842, + "kl": 0.06770531088113785, + "learning_rate": 2.063744407045134e-07, + "loss": 0.0001, + "reward": 0.9285714626312256, + "reward_std": 0.10101525485515594, + "rewards/check_gptzero_func": 0.9285714626312256, + "step": 700 + } + ], + "logging_steps": 1, + "max_steps": 1429, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}