care-chinese-gemma2-9b / trainer_state.json
geyang627's picture
Upload folder using huggingface_hub
b949186 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.8085106382978724,
"eval_steps": 500,
"global_step": 33,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0851063829787234,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/chosen": -1.8645445108413696,
"logits/rejected": 14.429821968078613,
"logps/chosen": -346.043701171875,
"logps/rejected": -212.6157684326172,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.1702127659574468,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/chosen": 0.5981572866439819,
"logits/rejected": 11.581156730651855,
"logps/chosen": -301.7901306152344,
"logps/rejected": -188.81680297851562,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.2553191489361702,
"grad_norm": 77.4799575805664,
"learning_rate": 1e-07,
"logits/chosen": -0.6666683554649353,
"logits/rejected": 13.3030424118042,
"logps/chosen": -244.64401245117188,
"logps/rejected": -135.6005096435547,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 3
},
{
"epoch": 0.3404255319148936,
"grad_norm": 75.18358612060547,
"learning_rate": 2e-07,
"logits/chosen": -0.6529165506362915,
"logits/rejected": 12.279073715209961,
"logps/chosen": -260.230224609375,
"logps/rejected": -151.9573974609375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 4
},
{
"epoch": 0.425531914893617,
"grad_norm": 80.90792846679688,
"learning_rate": 3e-07,
"logits/chosen": 0.008319228887557983,
"logits/rejected": 17.30872917175293,
"logps/chosen": -284.0721435546875,
"logps/rejected": -131.707275390625,
"loss": 0.6899,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.008991742506623268,
"rewards/margins": 0.007281172554939985,
"rewards/rejected": 0.001710569835267961,
"step": 5
},
{
"epoch": 0.5106382978723404,
"grad_norm": 70.98322296142578,
"learning_rate": 4e-07,
"logits/chosen": 1.6513853073120117,
"logits/rejected": 10.607856750488281,
"logps/chosen": -241.02496337890625,
"logps/rejected": -187.40670776367188,
"loss": 0.6637,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.08284933865070343,
"rewards/margins": 0.04663487523794174,
"rewards/rejected": 0.03621446341276169,
"step": 6
},
{
"epoch": 0.5957446808510638,
"grad_norm": 64.64167022705078,
"learning_rate": 5e-07,
"logits/chosen": -0.655532717704773,
"logits/rejected": 13.186487197875977,
"logps/chosen": -312.20770263671875,
"logps/rejected": -185.97059631347656,
"loss": 0.5792,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.3591811954975128,
"rewards/margins": 0.2757205367088318,
"rewards/rejected": 0.08346068859100342,
"step": 7
},
{
"epoch": 0.6808510638297872,
"grad_norm": 50.39228820800781,
"learning_rate": 4.821428571428571e-07,
"logits/chosen": -0.6116840243339539,
"logits/rejected": 14.426715850830078,
"logps/chosen": -278.035888671875,
"logps/rejected": -141.1471710205078,
"loss": 0.4993,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.6650260090827942,
"rewards/margins": 0.4626970589160919,
"rewards/rejected": 0.20232899487018585,
"step": 8
},
{
"epoch": 0.7659574468085106,
"grad_norm": 36.09687042236328,
"learning_rate": 4.6428571428571427e-07,
"logits/chosen": 1.258104681968689,
"logits/rejected": 13.34419059753418,
"logps/chosen": -207.1935577392578,
"logps/rejected": -125.97917938232422,
"loss": 0.3753,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.0702375173568726,
"rewards/margins": 0.9072185158729553,
"rewards/rejected": 0.1630191206932068,
"step": 9
},
{
"epoch": 0.851063829787234,
"grad_norm": 31.1262264251709,
"learning_rate": 4.464285714285714e-07,
"logits/chosen": 1.3978009223937988,
"logits/rejected": 14.55895709991455,
"logps/chosen": -271.84869384765625,
"logps/rejected": -154.90167236328125,
"loss": 0.3268,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.6433621644973755,
"rewards/margins": 1.044425368309021,
"rewards/rejected": 0.5989368557929993,
"step": 10
},
{
"epoch": 0.9361702127659575,
"grad_norm": 36.019447326660156,
"learning_rate": 4.285714285714285e-07,
"logits/chosen": 2.2715587615966797,
"logits/rejected": 11.973756790161133,
"logps/chosen": -254.08721923828125,
"logps/rejected": -192.8157501220703,
"loss": 0.3231,
"rewards/accuracies": 0.75,
"rewards/chosen": 2.2909791469573975,
"rewards/margins": 1.358994483947754,
"rewards/rejected": 0.931984543800354,
"step": 11
},
{
"epoch": 1.0212765957446808,
"grad_norm": 34.85441207885742,
"learning_rate": 4.1071428571428566e-07,
"logits/chosen": 2.2988595962524414,
"logits/rejected": 11.726805686950684,
"logps/chosen": -290.65106201171875,
"logps/rejected": -231.5535125732422,
"loss": 0.3005,
"rewards/accuracies": 0.84375,
"rewards/chosen": 2.6145758628845215,
"rewards/margins": 2.101471424102783,
"rewards/rejected": 0.5131043195724487,
"step": 12
},
{
"epoch": 1.1063829787234043,
"grad_norm": 15.43303394317627,
"learning_rate": 3.928571428571428e-07,
"logits/chosen": 2.161435127258301,
"logits/rejected": 15.173026084899902,
"logps/chosen": -285.2391662597656,
"logps/rejected": -175.35610961914062,
"loss": 0.1156,
"rewards/accuracies": 0.96875,
"rewards/chosen": 3.8329055309295654,
"rewards/margins": 3.6420764923095703,
"rewards/rejected": 0.19082875549793243,
"step": 13
},
{
"epoch": 1.1914893617021276,
"grad_norm": 22.640640258789062,
"learning_rate": 3.75e-07,
"logits/chosen": 0.34790876507759094,
"logits/rejected": 16.288211822509766,
"logps/chosen": -258.007080078125,
"logps/rejected": -125.82489013671875,
"loss": 0.1923,
"rewards/accuracies": 0.90625,
"rewards/chosen": 3.476907968521118,
"rewards/margins": 3.2935471534729004,
"rewards/rejected": 0.18336114287376404,
"step": 14
},
{
"epoch": 1.2765957446808511,
"grad_norm": 17.350461959838867,
"learning_rate": 3.5714285714285716e-07,
"logits/chosen": 0.2184600532054901,
"logits/rejected": 12.73766040802002,
"logps/chosen": -201.4739532470703,
"logps/rejected": -120.93484497070312,
"loss": 0.1553,
"rewards/accuracies": 0.9375,
"rewards/chosen": 3.4015889167785645,
"rewards/margins": 3.4224205017089844,
"rewards/rejected": -0.020831629633903503,
"step": 15
},
{
"epoch": 1.3617021276595744,
"grad_norm": 10.290063858032227,
"learning_rate": 3.392857142857143e-07,
"logits/chosen": 1.29380464553833,
"logits/rejected": 17.044889450073242,
"logps/chosen": -255.9779052734375,
"logps/rejected": -129.52340698242188,
"loss": 0.0961,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.283539295196533,
"rewards/margins": 4.322422504425049,
"rewards/rejected": -0.03888271749019623,
"step": 16
},
{
"epoch": 1.4468085106382977,
"grad_norm": 12.951630592346191,
"learning_rate": 3.2142857142857145e-07,
"logits/chosen": 0.8126751780509949,
"logits/rejected": 14.440966606140137,
"logps/chosen": -242.29527282714844,
"logps/rejected": -145.05775451660156,
"loss": 0.0923,
"rewards/accuracies": 0.96875,
"rewards/chosen": 3.8756561279296875,
"rewards/margins": 4.231058597564697,
"rewards/rejected": -0.35540255904197693,
"step": 17
},
{
"epoch": 1.5319148936170213,
"grad_norm": 19.92337417602539,
"learning_rate": 3.0357142857142855e-07,
"logits/chosen": 1.3031485080718994,
"logits/rejected": 13.797933578491211,
"logps/chosen": -248.54811096191406,
"logps/rejected": -165.89808654785156,
"loss": 0.0929,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.139030456542969,
"rewards/margins": 4.645418167114258,
"rewards/rejected": -0.5063877701759338,
"step": 18
},
{
"epoch": 1.6170212765957448,
"grad_norm": 9.488024711608887,
"learning_rate": 2.857142857142857e-07,
"logits/chosen": 2.991806983947754,
"logits/rejected": 14.36280632019043,
"logps/chosen": -278.69769287109375,
"logps/rejected": -145.9281463623047,
"loss": 0.0783,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.406310558319092,
"rewards/margins": 5.150607109069824,
"rewards/rejected": -0.7442967295646667,
"step": 19
},
{
"epoch": 1.702127659574468,
"grad_norm": 12.83969497680664,
"learning_rate": 2.6785714285714284e-07,
"logits/chosen": 3.6287827491760254,
"logits/rejected": 12.436114311218262,
"logps/chosen": -210.47592163085938,
"logps/rejected": -189.6756134033203,
"loss": 0.083,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.7564351558685303,
"rewards/margins": 4.538413047790527,
"rewards/rejected": -0.7819780707359314,
"step": 20
},
{
"epoch": 1.7872340425531914,
"grad_norm": 17.395404815673828,
"learning_rate": 2.5e-07,
"logits/chosen": 4.2808837890625,
"logits/rejected": 13.777887344360352,
"logps/chosen": -271.89227294921875,
"logps/rejected": -207.44837951660156,
"loss": 0.1051,
"rewards/accuracies": 0.96875,
"rewards/chosen": 4.251194477081299,
"rewards/margins": 5.237154006958008,
"rewards/rejected": -0.9859597682952881,
"step": 21
},
{
"epoch": 1.872340425531915,
"grad_norm": 23.122020721435547,
"learning_rate": 2.3214285714285714e-07,
"logits/chosen": 1.1441445350646973,
"logits/rejected": 14.379843711853027,
"logps/chosen": -235.24493408203125,
"logps/rejected": -194.19224548339844,
"loss": 0.1211,
"rewards/accuracies": 0.9375,
"rewards/chosen": 4.572199821472168,
"rewards/margins": 5.295849800109863,
"rewards/rejected": -0.7236496806144714,
"step": 22
},
{
"epoch": 1.9574468085106385,
"grad_norm": 13.432173728942871,
"learning_rate": 2.1428571428571426e-07,
"logits/chosen": -0.5294728875160217,
"logits/rejected": 16.71356201171875,
"logps/chosen": -219.78480529785156,
"logps/rejected": -111.86544036865234,
"loss": 0.1006,
"rewards/accuracies": 0.9375,
"rewards/chosen": 3.635490894317627,
"rewards/margins": 4.1884026527404785,
"rewards/rejected": -0.5529115200042725,
"step": 23
},
{
"epoch": 2.0425531914893615,
"grad_norm": 8.677014350891113,
"learning_rate": 1.964285714285714e-07,
"logits/chosen": 0.39843907952308655,
"logits/rejected": 13.533981323242188,
"logps/chosen": -216.69137573242188,
"logps/rejected": -143.2519989013672,
"loss": 0.056,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.103562355041504,
"rewards/margins": 4.519499778747559,
"rewards/rejected": -0.4159368574619293,
"step": 24
},
{
"epoch": 2.127659574468085,
"grad_norm": 3.03104305267334,
"learning_rate": 1.7857142857142858e-07,
"logits/chosen": 1.5668433904647827,
"logits/rejected": 14.729446411132812,
"logps/chosen": -191.61404418945312,
"logps/rejected": -183.37631225585938,
"loss": 0.03,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.106316566467285,
"rewards/margins": 5.793395042419434,
"rewards/rejected": -1.6870781183242798,
"step": 25
},
{
"epoch": 2.2127659574468086,
"grad_norm": 3.136233329772949,
"learning_rate": 1.6071428571428573e-07,
"logits/chosen": 0.05481068789958954,
"logits/rejected": 14.857564926147461,
"logps/chosen": -266.8812561035156,
"logps/rejected": -150.13543701171875,
"loss": 0.0235,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.614926338195801,
"rewards/margins": 5.615174293518066,
"rewards/rejected": -1.0002480745315552,
"step": 26
},
{
"epoch": 2.297872340425532,
"grad_norm": 2.425645112991333,
"learning_rate": 1.4285714285714285e-07,
"logits/chosen": 1.4185882806777954,
"logits/rejected": 12.52522087097168,
"logps/chosen": -257.33148193359375,
"logps/rejected": -225.2210235595703,
"loss": 0.0226,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.434384822845459,
"rewards/margins": 5.778439044952393,
"rewards/rejected": -1.3440542221069336,
"step": 27
},
{
"epoch": 2.382978723404255,
"grad_norm": 5.070924282073975,
"learning_rate": 1.25e-07,
"logits/chosen": 1.7797931432724,
"logits/rejected": 12.562594413757324,
"logps/chosen": -289.022216796875,
"logps/rejected": -247.23553466796875,
"loss": 0.0306,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.724697113037109,
"rewards/margins": 6.377914905548096,
"rewards/rejected": -1.6532176733016968,
"step": 28
},
{
"epoch": 2.4680851063829787,
"grad_norm": 2.1009552478790283,
"learning_rate": 1.0714285714285713e-07,
"logits/chosen": -0.9775732755661011,
"logits/rejected": 14.321802139282227,
"logps/chosen": -209.55625915527344,
"logps/rejected": -115.63282775878906,
"loss": 0.0191,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.2973175048828125,
"rewards/margins": 5.245972156524658,
"rewards/rejected": -0.9486544132232666,
"step": 29
},
{
"epoch": 2.5531914893617023,
"grad_norm": 2.554861545562744,
"learning_rate": 8.928571428571429e-08,
"logits/chosen": 2.66330623626709,
"logits/rejected": 11.6483154296875,
"logps/chosen": -243.45645141601562,
"logps/rejected": -232.2058563232422,
"loss": 0.0221,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.154801368713379,
"rewards/margins": 6.131631851196289,
"rewards/rejected": -1.9768304824829102,
"step": 30
},
{
"epoch": 2.6382978723404253,
"grad_norm": 2.7131459712982178,
"learning_rate": 7.142857142857142e-08,
"logits/chosen": -0.194177508354187,
"logits/rejected": 13.815250396728516,
"logps/chosen": -218.8939208984375,
"logps/rejected": -164.52902221679688,
"loss": 0.0205,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.5105462074279785,
"rewards/margins": 5.92357873916626,
"rewards/rejected": -1.413031816482544,
"step": 31
},
{
"epoch": 2.723404255319149,
"grad_norm": 3.911942481994629,
"learning_rate": 5.3571428571428564e-08,
"logits/chosen": -0.5970292091369629,
"logits/rejected": 16.974449157714844,
"logps/chosen": -247.49411010742188,
"logps/rejected": -128.2796630859375,
"loss": 0.0245,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.004292011260986,
"rewards/margins": 5.12349271774292,
"rewards/rejected": -1.1192007064819336,
"step": 32
},
{
"epoch": 2.8085106382978724,
"grad_norm": 3.6512134075164795,
"learning_rate": 3.571428571428571e-08,
"logits/chosen": -1.6241520643234253,
"logits/rejected": 12.453986167907715,
"logps/chosen": -295.5976867675781,
"logps/rejected": -223.0675811767578,
"loss": 0.0221,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.045155048370361,
"rewards/margins": 6.366570472717285,
"rewards/rejected": -1.321415901184082,
"step": 33
}
],
"logging_steps": 1.0,
"max_steps": 33,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9958223708160.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}