|
{ |
|
"best_metric": 0.7947742682321812, |
|
"best_model_checkpoint": "./XLM-V_64-multi-outputs/checkpoint-15000", |
|
"epoch": 14.8619957537155, |
|
"eval_steps": 1000, |
|
"global_step": 21000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7077140835102619, |
|
"grad_norm": 1.493615746498108, |
|
"learning_rate": 9.433962264150943e-07, |
|
"loss": 0.6936, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7077140835102619, |
|
"eval_accuracy": 0.5659663447177138, |
|
"eval_f1": 0.6995657867530498, |
|
"eval_loss": 0.682445764541626, |
|
"eval_precision": 0.5400659785037778, |
|
"eval_recall": 0.9927621283255086, |
|
"eval_runtime": 57.8197, |
|
"eval_samples_per_second": 173.695, |
|
"eval_steps_per_second": 2.715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4154281670205238, |
|
"grad_norm": 3.4982151985168457, |
|
"learning_rate": 1.8867924528301887e-06, |
|
"loss": 0.672, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4154281670205238, |
|
"eval_accuracy": 0.6204321417903017, |
|
"eval_f1": 0.7176714560805807, |
|
"eval_loss": 0.6340225338935852, |
|
"eval_precision": 0.5774731823599524, |
|
"eval_recall": 0.9477699530516432, |
|
"eval_runtime": 57.5505, |
|
"eval_samples_per_second": 174.508, |
|
"eval_steps_per_second": 2.728, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"grad_norm": 12.846685409545898, |
|
"learning_rate": 1.907732634338139e-06, |
|
"loss": 0.6537, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"eval_accuracy": 0.6434332370805537, |
|
"eval_f1": 0.7224676431837557, |
|
"eval_loss": 0.6225757002830505, |
|
"eval_precision": 0.5982543960980619, |
|
"eval_recall": 0.9117762128325508, |
|
"eval_runtime": 57.5731, |
|
"eval_samples_per_second": 174.439, |
|
"eval_steps_per_second": 2.727, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8308563340410475, |
|
"grad_norm": 4.933294296264648, |
|
"learning_rate": 1.8028833551769331e-06, |
|
"loss": 0.6499, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.8308563340410475, |
|
"eval_accuracy": 0.6594643034949716, |
|
"eval_f1": 0.743858597962852, |
|
"eval_loss": 0.604403555393219, |
|
"eval_precision": 0.6026699029126213, |
|
"eval_recall": 0.9714397496087637, |
|
"eval_runtime": 57.5924, |
|
"eval_samples_per_second": 174.381, |
|
"eval_steps_per_second": 2.726, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.538570417551309, |
|
"grad_norm": 9.257452011108398, |
|
"learning_rate": 1.6980340760157273e-06, |
|
"loss": 0.6389, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.538570417551309, |
|
"eval_accuracy": 0.660460021905805, |
|
"eval_f1": 0.6269146608315098, |
|
"eval_loss": 0.6047727465629578, |
|
"eval_precision": 0.7112711022840119, |
|
"eval_recall": 0.5604460093896714, |
|
"eval_runtime": 57.6043, |
|
"eval_samples_per_second": 174.344, |
|
"eval_steps_per_second": 2.725, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"grad_norm": 8.192702293395996, |
|
"learning_rate": 1.5931847968545215e-06, |
|
"loss": 0.6187, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"eval_accuracy": 0.6978990341531415, |
|
"eval_f1": 0.7157046476761619, |
|
"eval_loss": 0.5651090741157532, |
|
"eval_precision": 0.6868705035971223, |
|
"eval_recall": 0.7470657276995305, |
|
"eval_runtime": 57.5852, |
|
"eval_samples_per_second": 174.402, |
|
"eval_steps_per_second": 2.726, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.953998584571833, |
|
"grad_norm": 49.664493560791016, |
|
"learning_rate": 1.488335517693316e-06, |
|
"loss": 0.5962, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.953998584571833, |
|
"eval_accuracy": 0.7021806233197252, |
|
"eval_f1": 0.7534009398961168, |
|
"eval_loss": 0.5552804470062256, |
|
"eval_precision": 0.6511329628046174, |
|
"eval_recall": 0.8937793427230047, |
|
"eval_runtime": 57.4997, |
|
"eval_samples_per_second": 174.662, |
|
"eval_steps_per_second": 2.73, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.661712668082095, |
|
"grad_norm": 7.591745376586914, |
|
"learning_rate": 1.38348623853211e-06, |
|
"loss": 0.5716, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.661712668082095, |
|
"eval_accuracy": 0.7096485114009758, |
|
"eval_f1": 0.7601184600197434, |
|
"eval_loss": 0.5378949046134949, |
|
"eval_precision": 0.65587734241908, |
|
"eval_recall": 0.903755868544601, |
|
"eval_runtime": 57.5168, |
|
"eval_samples_per_second": 174.61, |
|
"eval_steps_per_second": 2.73, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"grad_norm": 7.424753189086914, |
|
"learning_rate": 1.2786369593709043e-06, |
|
"loss": 0.531, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"eval_accuracy": 0.7241860001991437, |
|
"eval_f1": 0.746800731261426, |
|
"eval_loss": 0.4708074629306793, |
|
"eval_precision": 0.7009265614275909, |
|
"eval_recall": 0.7991001564945227, |
|
"eval_runtime": 57.6158, |
|
"eval_samples_per_second": 174.31, |
|
"eval_steps_per_second": 2.725, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.077140835102618, |
|
"grad_norm": 9.683096885681152, |
|
"learning_rate": 1.1737876802096983e-06, |
|
"loss": 0.4858, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.077140835102618, |
|
"eval_accuracy": 0.7385243453151449, |
|
"eval_f1": 0.7664947536902009, |
|
"eval_loss": 0.44091591238975525, |
|
"eval_precision": 0.7026410172807304, |
|
"eval_recall": 0.843114241001565, |
|
"eval_runtime": 57.5121, |
|
"eval_samples_per_second": 174.624, |
|
"eval_steps_per_second": 2.73, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.78485491861288, |
|
"grad_norm": 25.252531051635742, |
|
"learning_rate": 1.0689384010484928e-06, |
|
"loss": 0.4577, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.78485491861288, |
|
"eval_accuracy": 0.7428059344817286, |
|
"eval_f1": 0.7718399434678915, |
|
"eval_loss": 0.42118868231773376, |
|
"eval_precision": 0.7036559832501208, |
|
"eval_recall": 0.8546557120500783, |
|
"eval_runtime": 57.4963, |
|
"eval_samples_per_second": 174.672, |
|
"eval_steps_per_second": 2.731, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"grad_norm": 9.067980766296387, |
|
"learning_rate": 9.64089121887287e-07, |
|
"loss": 0.4404, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"eval_accuracy": 0.7539579806830629, |
|
"eval_f1": 0.7588091752074183, |
|
"eval_loss": 0.40373048186302185, |
|
"eval_precision": 0.75725696473797, |
|
"eval_recall": 0.7603677621283255, |
|
"eval_runtime": 57.7852, |
|
"eval_samples_per_second": 173.799, |
|
"eval_steps_per_second": 2.717, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.200283085633403, |
|
"grad_norm": 6.412005424499512, |
|
"learning_rate": 8.592398427260812e-07, |
|
"loss": 0.42, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.200283085633403, |
|
"eval_accuracy": 0.7534601214776461, |
|
"eval_f1": 0.7862938028655274, |
|
"eval_loss": 0.394222617149353, |
|
"eval_precision": 0.7035835650293482, |
|
"eval_recall": 0.8910406885758998, |
|
"eval_runtime": 57.6787, |
|
"eval_samples_per_second": 174.12, |
|
"eval_steps_per_second": 2.722, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.907997169143666, |
|
"grad_norm": 9.248723983764648, |
|
"learning_rate": 7.543905635648754e-07, |
|
"loss": 0.4061, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.907997169143666, |
|
"eval_accuracy": 0.7524644030668127, |
|
"eval_f1": 0.7793752218672346, |
|
"eval_loss": 0.3883645236492157, |
|
"eval_precision": 0.7132878492527616, |
|
"eval_recall": 0.8589593114241002, |
|
"eval_runtime": 57.5171, |
|
"eval_samples_per_second": 174.609, |
|
"eval_steps_per_second": 2.73, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"grad_norm": 8.524171829223633, |
|
"learning_rate": 6.495412844036698e-07, |
|
"loss": 0.4007, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"eval_accuracy": 0.7528626904311461, |
|
"eval_f1": 0.7947742682321812, |
|
"eval_loss": 0.38538095355033875, |
|
"eval_precision": 0.6883414494414208, |
|
"eval_recall": 0.9401408450704225, |
|
"eval_runtime": 57.8102, |
|
"eval_samples_per_second": 173.724, |
|
"eval_steps_per_second": 2.716, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.32342533616419, |
|
"grad_norm": 4.774406909942627, |
|
"learning_rate": 5.44692005242464e-07, |
|
"loss": 0.3936, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.32342533616419, |
|
"eval_accuracy": 0.7545554117295629, |
|
"eval_f1": 0.793083186434987, |
|
"eval_loss": 0.38386669754981995, |
|
"eval_precision": 0.6946037347448905, |
|
"eval_recall": 0.9241001564945227, |
|
"eval_runtime": 57.8533, |
|
"eval_samples_per_second": 173.594, |
|
"eval_steps_per_second": 2.714, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 12.031139419674451, |
|
"grad_norm": 4.318003177642822, |
|
"learning_rate": 4.398427260812582e-07, |
|
"loss": 0.3915, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.031139419674451, |
|
"eval_accuracy": 0.7562481330279797, |
|
"eval_f1": 0.7931034482758621, |
|
"eval_loss": 0.3860309422016144, |
|
"eval_precision": 0.6982142857142857, |
|
"eval_recall": 0.9178403755868545, |
|
"eval_runtime": 57.8089, |
|
"eval_samples_per_second": 173.728, |
|
"eval_steps_per_second": 2.716, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"grad_norm": 15.07564926147461, |
|
"learning_rate": 3.3499344692005245e-07, |
|
"loss": 0.3888, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"eval_accuracy": 0.7568455640744798, |
|
"eval_f1": 0.7816523605150214, |
|
"eval_loss": 0.3812848627567291, |
|
"eval_precision": 0.7198616600790514, |
|
"eval_recall": 0.8550469483568075, |
|
"eval_runtime": 57.8126, |
|
"eval_samples_per_second": 173.716, |
|
"eval_steps_per_second": 2.716, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 13.446567586694975, |
|
"grad_norm": 10.069337844848633, |
|
"learning_rate": 2.3014416775884665e-07, |
|
"loss": 0.3832, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 13.446567586694975, |
|
"eval_accuracy": 0.7569451359155631, |
|
"eval_f1": 0.7900576244947106, |
|
"eval_loss": 0.37964996695518494, |
|
"eval_precision": 0.7049884881043745, |
|
"eval_recall": 0.8984741784037559, |
|
"eval_runtime": 57.6196, |
|
"eval_samples_per_second": 174.298, |
|
"eval_steps_per_second": 2.725, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 14.154281670205236, |
|
"grad_norm": 8.055023193359375, |
|
"learning_rate": 1.252948885976409e-07, |
|
"loss": 0.383, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.154281670205236, |
|
"eval_accuracy": 0.7573434232798965, |
|
"eval_f1": 0.7904016513288037, |
|
"eval_loss": 0.3802996873855591, |
|
"eval_precision": 0.7052954719877207, |
|
"eval_recall": 0.8988654147104851, |
|
"eval_runtime": 57.777, |
|
"eval_samples_per_second": 173.824, |
|
"eval_steps_per_second": 2.717, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"grad_norm": 2.661689281463623, |
|
"learning_rate": 2.0445609436435123e-08, |
|
"loss": 0.3823, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"eval_accuracy": 0.7576421388031465, |
|
"eval_f1": 0.7906056434962148, |
|
"eval_loss": 0.37897399067878723, |
|
"eval_precision": 0.7056203931203932, |
|
"eval_recall": 0.8988654147104851, |
|
"eval_runtime": 57.6891, |
|
"eval_samples_per_second": 174.088, |
|
"eval_steps_per_second": 2.721, |
|
"step": 21000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 21195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.173409489802339e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|