Qwen-2.5-Base-7B-gen8-math3to5_olympiads_aime-ghpo-cold0-hint50-prompt1-redonum-test
/
trainer_state.json
{ | |
"best_global_step": null, | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.9954198473282443, | |
"eval_steps": 200000, | |
"global_step": 163, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 567.619441986084, | |
"epoch": 0.0061068702290076335, | |
"grad_norm": 0.2135867174294605, | |
"learning_rate": 5.88235294117647e-08, | |
"loss": 0.0031, | |
"num_tokens": 743987.0, | |
"reward": 0.07700893143191934, | |
"reward_std": 0.1299287434667349, | |
"rewards/accuracy_reward": 0.06919642793945968, | |
"rewards/format_reward": 0.01562500058207661, | |
"step": 1 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 588.1579504013062, | |
"epoch": 0.030534351145038167, | |
"grad_norm": 0.3343295200773406, | |
"learning_rate": 2.941176470588235e-07, | |
"loss": -0.0019, | |
"num_tokens": 3769305.0, | |
"reward": 0.0671037980064284, | |
"reward_std": 0.12649992539081722, | |
"rewards/accuracy_reward": 0.05803571452270262, | |
"rewards/format_reward": 0.018136161408619955, | |
"step": 5 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 588.1192253112793, | |
"epoch": 0.061068702290076333, | |
"grad_norm": 3.974587027805743, | |
"learning_rate": 5.88235294117647e-07, | |
"loss": 0.004, | |
"num_tokens": 7571663.0, | |
"reward": 0.07087053959257901, | |
"reward_std": 0.12776878643780948, | |
"rewards/accuracy_reward": 0.05870535783469677, | |
"rewards/format_reward": 0.02433035774156451, | |
"step": 10 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 606.0297157287598, | |
"epoch": 0.0916030534351145, | |
"grad_norm": 0.495391151906653, | |
"learning_rate": 8.823529411764705e-07, | |
"loss": 0.0015, | |
"num_tokens": 11421588.0, | |
"reward": 0.09140625372529029, | |
"reward_std": 0.1608576664701104, | |
"rewards/accuracy_reward": 0.06540178635623306, | |
"rewards/format_reward": 0.05200892963912338, | |
"step": 15 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 603.9748046875, | |
"epoch": 0.12213740458015267, | |
"grad_norm": 0.40463318371067986, | |
"learning_rate": 9.989585804326962e-07, | |
"loss": -0.0012, | |
"num_tokens": 15290931.0, | |
"reward": 0.20602679532021284, | |
"reward_std": 0.26069728564471006, | |
"rewards/accuracy_reward": 0.08125000051222742, | |
"rewards/format_reward": 0.24955357071012257, | |
"step": 20 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 637.4477951049805, | |
"epoch": 0.15267175572519084, | |
"grad_norm": 0.2621022081219828, | |
"learning_rate": 9.926100533780304e-07, | |
"loss": 0.0092, | |
"num_tokens": 19308993.0, | |
"reward": 0.4613839514553547, | |
"reward_std": 0.29145158380270003, | |
"rewards/accuracy_reward": 0.09285714323632419, | |
"rewards/format_reward": 0.737053570151329, | |
"step": 25 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 643.634407043457, | |
"epoch": 0.183206106870229, | |
"grad_norm": 0.6637031031614584, | |
"learning_rate": 9.805648919361503e-07, | |
"loss": 0.0201, | |
"num_tokens": 23260675.0, | |
"reward": 0.607031274586916, | |
"reward_std": 0.22586645130068064, | |
"rewards/accuracy_reward": 0.1455357144586742, | |
"rewards/format_reward": 0.9229910746216774, | |
"step": 30 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 719.7797180175781, | |
"epoch": 0.21374045801526717, | |
"grad_norm": 0.12398978901271052, | |
"learning_rate": 9.62962388596925e-07, | |
"loss": 0.0393, | |
"num_tokens": 27603904.0, | |
"reward": 0.6875000298023224, | |
"reward_std": 0.2122076230123639, | |
"rewards/accuracy_reward": 0.2053571424447, | |
"rewards/format_reward": 0.9642857119441033, | |
"step": 35 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 754.8406555175782, | |
"epoch": 0.24427480916030533, | |
"grad_norm": 0.11716820534931413, | |
"learning_rate": 9.400061019867678e-07, | |
"loss": 0.0414, | |
"num_tokens": 32059078.0, | |
"reward": 0.6993303894996643, | |
"reward_std": 0.18653424717485906, | |
"rewards/accuracy_reward": 0.21361607229337096, | |
"rewards/format_reward": 0.9714285716414451, | |
"step": 40 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 752.6609725952148, | |
"epoch": 0.2748091603053435, | |
"grad_norm": 0.1366862152370541, | |
"learning_rate": 9.11961502878777e-07, | |
"loss": 0.0295, | |
"num_tokens": 36473463.0, | |
"reward": 0.7407366394996643, | |
"reward_std": 0.16629975400865077, | |
"rewards/accuracy_reward": 0.24955357071012257, | |
"rewards/format_reward": 0.9823660656809807, | |
"step": 45 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 762.0527130126953, | |
"epoch": 0.3053435114503817, | |
"grad_norm": 0.11417326322107703, | |
"learning_rate": 8.791529042392812e-07, | |
"loss": 0.0308, | |
"num_tokens": 40938643.0, | |
"reward": 0.7233259305357933, | |
"reward_std": 0.16576984385028481, | |
"rewards/accuracy_reward": 0.23281249962747097, | |
"rewards/format_reward": 0.9810267806053161, | |
"step": 50 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 764.9165496826172, | |
"epoch": 0.33587786259541985, | |
"grad_norm": 0.10399028120391927, | |
"learning_rate": 8.419597108123053e-07, | |
"loss": 0.0269, | |
"num_tokens": 45439493.0, | |
"reward": 0.729910746216774, | |
"reward_std": 0.16336991311982274, | |
"rewards/accuracy_reward": 0.2368303578812629, | |
"rewards/format_reward": 0.9861607074737548, | |
"step": 55 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 750.571240234375, | |
"epoch": 0.366412213740458, | |
"grad_norm": 0.10084790893922295, | |
"learning_rate": 8.008120316124611e-07, | |
"loss": 0.0243, | |
"num_tokens": 49869428.0, | |
"reward": 0.7180803894996644, | |
"reward_std": 0.1501687964424491, | |
"rewards/accuracy_reward": 0.22611607052385807, | |
"rewards/format_reward": 0.9839285641908646, | |
"step": 60 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 788.064323425293, | |
"epoch": 0.3969465648854962, | |
"grad_norm": 0.0777401486625089, | |
"learning_rate": 7.561857060642119e-07, | |
"loss": 0.0195, | |
"num_tokens": 54497556.0, | |
"reward": 0.7145089581608772, | |
"reward_std": 0.16678392123430968, | |
"rewards/accuracy_reward": 0.22120535522699356, | |
"rewards/format_reward": 0.9866071388125419, | |
"step": 65 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 759.6870880126953, | |
"epoch": 0.42748091603053434, | |
"grad_norm": 0.08469355022666704, | |
"learning_rate": 7.085968013061584e-07, | |
"loss": 0.0277, | |
"num_tokens": 58963650.0, | |
"reward": 0.7053571730852127, | |
"reward_std": 0.13858543820679187, | |
"rewards/accuracy_reward": 0.21272321385331452, | |
"rewards/format_reward": 0.9852678492665291, | |
"step": 70 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 784.3569534301757, | |
"epoch": 0.4580152671755725, | |
"grad_norm": 0.09089115066810906, | |
"learning_rate": 6.585956442945531e-07, | |
"loss": 0.022, | |
"num_tokens": 63555393.0, | |
"reward": 0.7090402141213417, | |
"reward_std": 0.15061827255412935, | |
"rewards/accuracy_reward": 0.21651785587891936, | |
"rewards/format_reward": 0.9850446343421936, | |
"step": 75 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 797.2703521728515, | |
"epoch": 0.48854961832061067, | |
"grad_norm": 0.09002554108722542, | |
"learning_rate": 6.06760457719898e-07, | |
"loss": 0.0238, | |
"num_tokens": 68216500.0, | |
"reward": 0.7666295006871223, | |
"reward_std": 0.16813623085618018, | |
"rewards/accuracy_reward": 0.27165178642608223, | |
"rewards/format_reward": 0.9899553492665291, | |
"step": 80 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 761.4013763427735, | |
"epoch": 0.5190839694656488, | |
"grad_norm": 0.08498851686715876, | |
"learning_rate": 5.536906733320815e-07, | |
"loss": 0.0158, | |
"num_tokens": 72721850.0, | |
"reward": 0.7462053880095482, | |
"reward_std": 0.1457503356039524, | |
"rewards/accuracy_reward": 0.2502232126891613, | |
"rewards/format_reward": 0.9919642806053162, | |
"step": 85 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 766.9694580078125, | |
"epoch": 0.549618320610687, | |
"grad_norm": 0.07561608030311687, | |
"learning_rate": 5e-07, | |
"loss": 0.024, | |
"num_tokens": 77220857.0, | |
"reward": 0.739285746216774, | |
"reward_std": 0.14479873944073915, | |
"rewards/accuracy_reward": 0.2448660720139742, | |
"rewards/format_reward": 0.9888392820954323, | |
"step": 90 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 760.7444534301758, | |
"epoch": 0.5801526717557252, | |
"grad_norm": 0.08175108965516903, | |
"learning_rate": 4.463093266679185e-07, | |
"loss": 0.0241, | |
"num_tokens": 81689720.0, | |
"reward": 0.7234375298023223, | |
"reward_std": 0.1551271199248731, | |
"rewards/accuracy_reward": 0.22790178465656935, | |
"rewards/format_reward": 0.9910714194178581, | |
"step": 95 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 759.4685607910156, | |
"epoch": 0.6106870229007634, | |
"grad_norm": 0.09009365686861091, | |
"learning_rate": 3.932395422801019e-07, | |
"loss": 0.0283, | |
"num_tokens": 86136451.0, | |
"reward": 0.7452009245753288, | |
"reward_std": 0.1618154514580965, | |
"rewards/accuracy_reward": 0.2524553562514484, | |
"rewards/format_reward": 0.9854910641908645, | |
"step": 100 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 739.8957916259766, | |
"epoch": 0.6412213740458015, | |
"grad_norm": 0.09858205620896988, | |
"learning_rate": 3.41404355705447e-07, | |
"loss": 0.0175, | |
"num_tokens": 90456928.0, | |
"reward": 0.7717634305357933, | |
"reward_std": 0.1629006579518318, | |
"rewards/accuracy_reward": 0.2754464283585548, | |
"rewards/format_reward": 0.9926339209079742, | |
"step": 105 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 779.8815078735352, | |
"epoch": 0.6717557251908397, | |
"grad_norm": 0.08118289189251325, | |
"learning_rate": 2.914031986938417e-07, | |
"loss": 0.019, | |
"num_tokens": 95039765.0, | |
"reward": 0.7184152096509934, | |
"reward_std": 0.15354990400373936, | |
"rewards/accuracy_reward": 0.2238839288474992, | |
"rewards/format_reward": 0.9890624955296516, | |
"step": 110 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 743.4281570434571, | |
"epoch": 0.7022900763358778, | |
"grad_norm": 0.08942937354761228, | |
"learning_rate": 2.4381429393578815e-07, | |
"loss": 0.0134, | |
"num_tokens": 99422555.0, | |
"reward": 0.7406250327825546, | |
"reward_std": 0.15929818488657474, | |
"rewards/accuracy_reward": 0.24397321371361613, | |
"rewards/format_reward": 0.9933035641908645, | |
"step": 115 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 755.4859725952149, | |
"epoch": 0.732824427480916, | |
"grad_norm": 0.08614513315269835, | |
"learning_rate": 1.991879683875386e-07, | |
"loss": 0.0216, | |
"num_tokens": 103884636.0, | |
"reward": 0.7185268118977547, | |
"reward_std": 0.14079238111153244, | |
"rewards/accuracy_reward": 0.22388392696157097, | |
"rewards/format_reward": 0.9892857059836387, | |
"step": 120 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 738.5589630126954, | |
"epoch": 0.7633587786259542, | |
"grad_norm": 0.0987941233877778, | |
"learning_rate": 1.5804028918769485e-07, | |
"loss": 0.0177, | |
"num_tokens": 108269772.0, | |
"reward": 0.7502232447266579, | |
"reward_std": 0.1488088957965374, | |
"rewards/accuracy_reward": 0.2546874986961484, | |
"rewards/format_reward": 0.9910714223980903, | |
"step": 125 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 718.1944564819336, | |
"epoch": 0.7938931297709924, | |
"grad_norm": 0.08957435241917934, | |
"learning_rate": 1.2084709576071883e-07, | |
"loss": 0.0164, | |
"num_tokens": 112486547.0, | |
"reward": 0.7435268193483353, | |
"reward_std": 0.15864104311913252, | |
"rewards/accuracy_reward": 0.24799107126891612, | |
"rewards/format_reward": 0.9910714194178581, | |
"step": 130 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 771.4886535644531, | |
"epoch": 0.8244274809160306, | |
"grad_norm": 0.10033485818985768, | |
"learning_rate": 8.803849712122291e-08, | |
"loss": 0.0191, | |
"num_tokens": 117017984.0, | |
"reward": 0.7385044932365418, | |
"reward_std": 0.15429062955081463, | |
"rewards/accuracy_reward": 0.24508928610011935, | |
"rewards/format_reward": 0.9868303492665291, | |
"step": 135 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 753.3828475952148, | |
"epoch": 0.8549618320610687, | |
"grad_norm": 0.08591161076983043, | |
"learning_rate": 5.999389801323218e-08, | |
"loss": 0.017, | |
"num_tokens": 121422939.0, | |
"reward": 0.7706473574042321, | |
"reward_std": 0.16626517940312624, | |
"rewards/accuracy_reward": 0.2743303582072258, | |
"rewards/format_reward": 0.9926339253783226, | |
"step": 140 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 737.1861953735352, | |
"epoch": 0.8854961832061069, | |
"grad_norm": 0.08133023369347758, | |
"learning_rate": 3.7037611403075096e-08, | |
"loss": 0.0211, | |
"num_tokens": 125792325.0, | |
"reward": 0.7737723588943481, | |
"reward_std": 0.14874637452885509, | |
"rewards/accuracy_reward": 0.2774553569033742, | |
"rewards/format_reward": 0.9926339238882065, | |
"step": 145 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 719.9216812133789, | |
"epoch": 0.916030534351145, | |
"grad_norm": 0.09062176860429237, | |
"learning_rate": 1.943510806384968e-08, | |
"loss": 0.0223, | |
"num_tokens": 130100454.0, | |
"reward": 0.7459821745753288, | |
"reward_std": 0.1444489900022745, | |
"rewards/accuracy_reward": 0.24977678433060646, | |
"rewards/format_reward": 0.9924107134342194, | |
"step": 150 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 725.9875335693359, | |
"epoch": 0.9465648854961832, | |
"grad_norm": 0.0910938816922007, | |
"learning_rate": 7.389946621969678e-09, | |
"loss": 0.0111, | |
"num_tokens": 134418750.0, | |
"reward": 0.7150669932365418, | |
"reward_std": 0.145760334469378, | |
"rewards/accuracy_reward": 0.21897321371361614, | |
"rewards/format_reward": 0.9921874910593033, | |
"step": 155 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 729.7480224609375, | |
"epoch": 0.9770992366412213, | |
"grad_norm": 0.0976254681849543, | |
"learning_rate": 1.0414195673039138e-09, | |
"loss": 0.0201, | |
"num_tokens": 138738941.0, | |
"reward": 0.7399553954601288, | |
"reward_std": 0.13362135970965028, | |
"rewards/accuracy_reward": 0.24397321399301292, | |
"rewards/format_reward": 0.9919642806053162, | |
"step": 160 | |
}, | |
{ | |
"clip_ratio": 0.0, | |
"completion_length": 757.2314198811849, | |
"epoch": 0.9954198473282443, | |
"num_tokens": 141385215.0, | |
"reward": 0.7395833656191826, | |
"reward_std": 0.1369063208500544, | |
"rewards/accuracy_reward": 0.2447916651920726, | |
"rewards/format_reward": 0.9895833333333334, | |
"step": 163, | |
"total_flos": 0.0, | |
"train_loss": 0.019440959097726123, | |
"train_runtime": 57675.0727, | |
"train_samples_per_second": 0.318, | |
"train_steps_per_second": 0.003 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 163, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 1, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |