{ "best_metric": 0.7399751659163123, "best_model_checkpoint": "/opt/dlami/nvme/shevtsov/sent_checkpoints/checkpoint-102510", "epoch": 10.0, "eval_steps": 500, "global_step": 102510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 10251, "train_accuracy": 0.9232076630758701, "train_f1": 0.9284970394554347, "train_loss": 0.08324012160301208, "train_precision": 0.9395995876182259, "train_recall": 0.9232076630758701, "train_runtime": 5077.4566, "train_samples_per_second": 775.226, "train_steps_per_second": 2.019 }, { "epoch": 1.0, "grad_norm": 2.202436923980713, "learning_rate": 5e-06, "loss": 0.1512, "step": 10251 }, { "epoch": 1.0, "eval_accuracy": 0.5906430303537085, "eval_f1": 0.5953154182610875, "eval_loss": 1.1822575330734253, "eval_precision": 0.7605817989645098, "eval_recall": 0.5906430303537085, "eval_runtime": 31.2064, "eval_samples_per_second": 766.446, "eval_steps_per_second": 2.019, "step": 10251 }, { "epoch": 2.0, "step": 20502, "train_accuracy": 0.9251504125579815, "train_f1": 0.9318479879378413, "train_loss": 0.06029369682073593, "train_precision": 0.9457024638382566, "train_recall": 0.9251504125579815, "train_runtime": 5078.2486, "train_samples_per_second": 775.105, "train_steps_per_second": 2.019 }, { "epoch": 2.0, "grad_norm": 2.3391969203948975, "learning_rate": 4.849231551964771e-06, "loss": 0.0727, "step": 20502 }, { "epoch": 2.0, "eval_accuracy": 0.5809850321933272, "eval_f1": 0.5827904211311477, "eval_loss": 0.8601031303405762, "eval_precision": 0.7707426917608643, "eval_recall": 0.5809850321933272, "eval_runtime": 31.1947, "eval_samples_per_second": 766.734, "eval_steps_per_second": 2.02, "step": 20502 }, { "epoch": 3.0, "step": 30753, "train_accuracy": 0.9311338878819889, "train_f1": 0.9367318532276315, "train_loss": 0.05340421944856644, "train_precision": 0.948615352312463, "train_recall": 0.9311338878819889, "train_runtime": 5078.9472, "train_samples_per_second": 774.998, "train_steps_per_second": 2.018 }, { "epoch": 3.0, "grad_norm": 3.368744134902954, "learning_rate": 4.415111107797445e-06, "loss": 0.064, "step": 30753 }, { "epoch": 3.0, "eval_accuracy": 0.6280207375198595, "eval_f1": 0.6341902420521265, "eval_loss": 1.0681450366973877, "eval_precision": 0.768741241964947, "eval_recall": 0.6280207375198595, "eval_runtime": 31.2028, "eval_samples_per_second": 766.535, "eval_steps_per_second": 2.019, "step": 30753 }, { "epoch": 4.0, "step": 41004, "train_accuracy": 0.941354218588914, "train_f1": 0.9444982035100681, "train_loss": 0.0493415892124176, "train_precision": 0.9518003283876015, "train_recall": 0.941354218588914, "train_runtime": 5080.4688, "train_samples_per_second": 774.766, "train_steps_per_second": 2.018 }, { "epoch": 4.0, "grad_norm": 7.862049579620361, "learning_rate": 3.7500000000000005e-06, "loss": 0.0585, "step": 41004 }, { "epoch": 4.0, "eval_accuracy": 0.6900660590350364, "eval_f1": 0.6986283009597273, "eval_loss": 1.4099539518356323, "eval_precision": 0.7671384440115526, "eval_recall": 0.6900660590350364, "eval_runtime": 31.2008, "eval_samples_per_second": 766.584, "eval_steps_per_second": 2.019, "step": 41004 }, { "epoch": 5.0, "step": 51255, "train_accuracy": 0.9479113474150279, "train_f1": 0.9498162368391557, "train_loss": 0.04905932769179344, "train_precision": 0.9544620214289802, "train_recall": 0.9479113474150279, "train_runtime": 5088.1804, "train_samples_per_second": 773.592, "train_steps_per_second": 2.015 }, { "epoch": 5.0, "grad_norm": 1.6737421751022339, "learning_rate": 2.9341204441673267e-06, "loss": 0.0543, "step": 51255 }, { "epoch": 5.0, "eval_accuracy": 0.7279454803913371, "eval_f1": 0.7344324841621389, "eval_loss": 2.154353618621826, "eval_precision": 0.7651689935562233, "eval_recall": 0.7279454803913371, "eval_runtime": 31.2388, "eval_samples_per_second": 765.65, "eval_steps_per_second": 2.017, "step": 51255 }, { "epoch": 6.0, "step": 61506, "train_accuracy": 0.944803507162031, "train_f1": 0.9475462857180277, "train_loss": 0.0426737517118454, "train_precision": 0.9540200071722176, "train_recall": 0.944803507162031, "train_runtime": 5078.4022, "train_samples_per_second": 775.081, "train_steps_per_second": 2.019 }, { "epoch": 6.0, "grad_norm": 2.879870653152466, "learning_rate": 2.0658795558326745e-06, "loss": 0.051, "step": 61506 }, { "epoch": 6.0, "eval_accuracy": 0.7079187223011958, "eval_f1": 0.7155283465045912, "eval_loss": 2.2507946491241455, "eval_precision": 0.7570941064333973, "eval_recall": 0.7079187223011958, "eval_runtime": 31.2026, "eval_samples_per_second": 766.538, "eval_steps_per_second": 2.019, "step": 61506 }, { "epoch": 7.0, "step": 71757, "train_accuracy": 0.9474675154096338, "train_f1": 0.9497025418465336, "train_loss": 0.0410270020365715, "train_precision": 0.9551267616744933, "train_recall": 0.9474675154096338, "train_runtime": 5080.7556, "train_samples_per_second": 774.722, "train_steps_per_second": 2.018 }, { "epoch": 7.0, "grad_norm": 1.4426418542861938, "learning_rate": 1.2500000000000007e-06, "loss": 0.0486, "step": 71757 }, { "epoch": 7.0, "eval_accuracy": 0.7198344343172506, "eval_f1": 0.726834352850576, "eval_loss": 2.45487380027771, "eval_precision": 0.7590566392960729, "eval_recall": 0.7198344343172506, "eval_runtime": 31.2049, "eval_samples_per_second": 766.482, "eval_steps_per_second": 2.019, "step": 71757 }, { "epoch": 8.0, "step": 82008, "train_accuracy": 0.9504046315025708, "train_f1": 0.9520763661297328, "train_loss": 0.0410199835896492, "train_precision": 0.9562815819971588, "train_recall": 0.9504046315025708, "train_runtime": 5079.0768, "train_samples_per_second": 774.978, "train_steps_per_second": 2.018 }, { "epoch": 8.0, "grad_norm": 1.1270148754119873, "learning_rate": 5.848888922025553e-07, "loss": 0.0468, "step": 82008 }, { "epoch": 8.0, "eval_accuracy": 0.7316665273016139, "eval_f1": 0.737640105520052, "eval_loss": 2.602576494216919, "eval_precision": 0.7612609852108917, "eval_recall": 0.7316665273016139, "eval_runtime": 31.1916, "eval_samples_per_second": 766.809, "eval_steps_per_second": 2.02, "step": 82008 }, { "epoch": 9.0, "step": 92259, "train_accuracy": 0.9505296259769004, "train_f1": 0.9522083394688067, "train_loss": 0.03998752683401108, "train_precision": 0.9564247374560226, "train_recall": 0.9505296259769004, "train_runtime": 5078.0822, "train_samples_per_second": 775.13, "train_steps_per_second": 2.019 }, { "epoch": 9.0, "grad_norm": 1.666390061378479, "learning_rate": 1.507684480352292e-07, "loss": 0.0457, "step": 92259 }, { "epoch": 9.0, "eval_accuracy": 0.7320010034283803, "eval_f1": 0.7378168031871946, "eval_loss": 2.8211002349853516, "eval_precision": 0.7595695441249473, "eval_recall": 0.7320010034283803, "eval_runtime": 31.2132, "eval_samples_per_second": 766.278, "eval_steps_per_second": 2.018, "step": 92259 }, { "epoch": 10.0, "step": 102510, "train_accuracy": 0.9510209660446921, "train_f1": 0.9526073589638447, "train_loss": 0.040298543870449066, "train_precision": 0.9566123120238171, "train_recall": 0.9510209660446921, "train_runtime": 5078.6508, "train_samples_per_second": 775.043, "train_steps_per_second": 2.018 }, { "epoch": 10.0, "grad_norm": 1.6141560077667236, "learning_rate": 0.0, "loss": 0.0451, "step": 102510 }, { "epoch": 10.0, "eval_accuracy": 0.7345095743791287, "eval_f1": 0.7399751659163123, "eval_loss": 2.884028911590576, "eval_precision": 0.759520061975123, "eval_recall": 0.7345095743791287, "eval_runtime": 31.2146, "eval_samples_per_second": 766.243, "eval_steps_per_second": 2.018, "step": 102510 } ], "logging_steps": 500, "max_steps": 102510, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0357340434899927e+19, "train_batch_size": 48, "trial_name": null, "trial_params": null }