{ "best_metric": 0.6745283018867925, "best_model_checkpoint": "videomae-base-finetuned-engine-subset-20230313/checkpoint-988", "epoch": 29.00720720720721, "global_step": 1110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.504504504504505e-06, "loss": 2.7278, "step": 10 }, { "epoch": 0.02, "learning_rate": 9.00900900900901e-06, "loss": 2.747, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.3513513513513515e-05, "loss": 2.6212, "step": 30 }, { "epoch": 0.03, "eval_accuracy": 0.37735849056603776, "eval_loss": 2.362868070602417, "eval_runtime": 69.3989, "eval_samples_per_second": 6.11, "eval_steps_per_second": 1.023, "step": 38 }, { "epoch": 1.0, "learning_rate": 1.801801801801802e-05, "loss": 2.6599, "step": 40 }, { "epoch": 1.01, "learning_rate": 2.2522522522522523e-05, "loss": 2.5895, "step": 50 }, { "epoch": 1.02, "learning_rate": 2.702702702702703e-05, "loss": 2.5383, "step": 60 }, { "epoch": 1.03, "learning_rate": 3.153153153153153e-05, "loss": 2.455, "step": 70 }, { "epoch": 1.03, "eval_accuracy": 0.2169811320754717, "eval_loss": 2.3674099445343018, "eval_runtime": 70.0897, "eval_samples_per_second": 6.049, "eval_steps_per_second": 1.013, "step": 76 }, { "epoch": 2.0, "learning_rate": 3.603603603603604e-05, "loss": 2.421, "step": 80 }, { "epoch": 2.01, "learning_rate": 4.0540540540540545e-05, "loss": 2.4139, "step": 90 }, { "epoch": 2.02, "learning_rate": 4.5045045045045046e-05, "loss": 2.4999, "step": 100 }, { "epoch": 2.03, "learning_rate": 4.954954954954955e-05, "loss": 2.4311, "step": 110 }, { "epoch": 2.03, "eval_accuracy": 0.3231132075471698, "eval_loss": 2.2190816402435303, "eval_runtime": 71.2703, "eval_samples_per_second": 5.949, "eval_steps_per_second": 0.996, "step": 114 }, { "epoch": 3.01, "learning_rate": 4.954954954954955e-05, "loss": 2.5089, "step": 120 }, { "epoch": 3.01, "learning_rate": 4.9049049049049054e-05, "loss": 2.4986, "step": 130 }, { "epoch": 3.02, "learning_rate": 4.854854854854855e-05, "loss": 2.3317, "step": 140 }, { "epoch": 3.03, "learning_rate": 4.804804804804805e-05, "loss": 2.2768, "step": 150 }, { "epoch": 3.03, "eval_accuracy": 0.3608490566037736, "eval_loss": 2.1226866245269775, "eval_runtime": 70.1636, "eval_samples_per_second": 6.043, "eval_steps_per_second": 1.012, "step": 152 }, { "epoch": 4.01, "learning_rate": 4.754754754754755e-05, "loss": 2.0041, "step": 160 }, { "epoch": 4.02, "learning_rate": 4.704704704704705e-05, "loss": 2.2092, "step": 170 }, { "epoch": 4.03, "learning_rate": 4.654654654654655e-05, "loss": 1.9442, "step": 180 }, { "epoch": 4.03, "learning_rate": 4.604604604604605e-05, "loss": 1.7528, "step": 190 }, { "epoch": 4.03, "eval_accuracy": 0.4363207547169811, "eval_loss": 1.729621171951294, "eval_runtime": 69.2109, "eval_samples_per_second": 6.126, "eval_steps_per_second": 1.026, "step": 190 }, { "epoch": 5.01, "learning_rate": 4.5545545545545545e-05, "loss": 1.6692, "step": 200 }, { "epoch": 5.02, "learning_rate": 4.5045045045045046e-05, "loss": 1.8567, "step": 210 }, { "epoch": 5.03, "learning_rate": 4.4544544544544546e-05, "loss": 1.5381, "step": 220 }, { "epoch": 5.03, "eval_accuracy": 0.4339622641509434, "eval_loss": 1.5015883445739746, "eval_runtime": 71.2754, "eval_samples_per_second": 5.949, "eval_steps_per_second": 0.996, "step": 228 }, { "epoch": 6.0, "learning_rate": 4.404404404404405e-05, "loss": 1.4846, "step": 230 }, { "epoch": 6.01, "learning_rate": 4.354354354354355e-05, "loss": 1.3143, "step": 240 }, { "epoch": 6.02, "learning_rate": 4.304304304304305e-05, "loss": 1.2959, "step": 250 }, { "epoch": 6.03, "learning_rate": 4.254254254254255e-05, "loss": 1.407, "step": 260 }, { "epoch": 6.03, "eval_accuracy": 0.5448113207547169, "eval_loss": 1.2878233194351196, "eval_runtime": 70.9118, "eval_samples_per_second": 5.979, "eval_steps_per_second": 1.001, "step": 266 }, { "epoch": 7.0, "learning_rate": 4.204204204204204e-05, "loss": 1.3608, "step": 270 }, { "epoch": 7.01, "learning_rate": 4.1541541541541544e-05, "loss": 1.5729, "step": 280 }, { "epoch": 7.02, "learning_rate": 4.1041041041041045e-05, "loss": 1.3095, "step": 290 }, { "epoch": 7.03, "learning_rate": 4.0540540540540545e-05, "loss": 1.1053, "step": 300 }, { "epoch": 7.03, "eval_accuracy": 0.4009433962264151, "eval_loss": 1.5210211277008057, "eval_runtime": 68.6229, "eval_samples_per_second": 6.179, "eval_steps_per_second": 1.035, "step": 304 }, { "epoch": 8.01, "learning_rate": 4.0040040040040046e-05, "loss": 1.2462, "step": 310 }, { "epoch": 8.01, "learning_rate": 3.953953953953955e-05, "loss": 1.114, "step": 320 }, { "epoch": 8.02, "learning_rate": 3.903903903903904e-05, "loss": 1.2971, "step": 330 }, { "epoch": 8.03, "learning_rate": 3.8538538538538534e-05, "loss": 1.0893, "step": 340 }, { "epoch": 8.03, "eval_accuracy": 0.46226415094339623, "eval_loss": 1.3901519775390625, "eval_runtime": 70.4934, "eval_samples_per_second": 6.015, "eval_steps_per_second": 1.007, "step": 342 }, { "epoch": 9.01, "learning_rate": 3.8038038038038035e-05, "loss": 1.3258, "step": 350 }, { "epoch": 9.02, "learning_rate": 3.7537537537537536e-05, "loss": 1.1349, "step": 360 }, { "epoch": 9.03, "learning_rate": 3.7037037037037037e-05, "loss": 1.0029, "step": 370 }, { "epoch": 9.03, "learning_rate": 3.653653653653654e-05, "loss": 0.8136, "step": 380 }, { "epoch": 9.03, "eval_accuracy": 0.4033018867924528, "eval_loss": 1.6456053256988525, "eval_runtime": 71.0727, "eval_samples_per_second": 5.966, "eval_steps_per_second": 0.999, "step": 380 }, { "epoch": 10.01, "learning_rate": 3.603603603603604e-05, "loss": 1.1048, "step": 390 }, { "epoch": 10.02, "learning_rate": 3.553553553553554e-05, "loss": 1.2661, "step": 400 }, { "epoch": 10.03, "learning_rate": 3.503503503503503e-05, "loss": 0.9565, "step": 410 }, { "epoch": 10.03, "eval_accuracy": 0.5613207547169812, "eval_loss": 1.18259859085083, "eval_runtime": 70.181, "eval_samples_per_second": 6.042, "eval_steps_per_second": 1.012, "step": 418 }, { "epoch": 11.0, "learning_rate": 3.453453453453453e-05, "loss": 1.0921, "step": 420 }, { "epoch": 11.01, "learning_rate": 3.4034034034034034e-05, "loss": 0.7383, "step": 430 }, { "epoch": 11.02, "learning_rate": 3.3533533533533535e-05, "loss": 0.926, "step": 440 }, { "epoch": 11.03, "learning_rate": 3.3033033033033035e-05, "loss": 1.0147, "step": 450 }, { "epoch": 11.03, "eval_accuracy": 0.5117924528301887, "eval_loss": 1.2098637819290161, "eval_runtime": 70.8824, "eval_samples_per_second": 5.982, "eval_steps_per_second": 1.002, "step": 456 }, { "epoch": 12.0, "learning_rate": 3.2532532532532536e-05, "loss": 0.9178, "step": 460 }, { "epoch": 12.01, "learning_rate": 3.203203203203203e-05, "loss": 0.8994, "step": 470 }, { "epoch": 12.02, "learning_rate": 3.153153153153153e-05, "loss": 0.8648, "step": 480 }, { "epoch": 12.03, "learning_rate": 3.103103103103103e-05, "loss": 0.9125, "step": 490 }, { "epoch": 12.03, "eval_accuracy": 0.5495283018867925, "eval_loss": 1.1849919557571411, "eval_runtime": 70.4782, "eval_samples_per_second": 6.016, "eval_steps_per_second": 1.007, "step": 494 }, { "epoch": 13.01, "learning_rate": 3.053053053053053e-05, "loss": 0.8926, "step": 500 }, { "epoch": 13.01, "learning_rate": 3.0030030030030033e-05, "loss": 0.8618, "step": 510 }, { "epoch": 13.02, "learning_rate": 2.952952952952953e-05, "loss": 0.8057, "step": 520 }, { "epoch": 13.03, "learning_rate": 2.902902902902903e-05, "loss": 0.7091, "step": 530 }, { "epoch": 13.03, "eval_accuracy": 0.535377358490566, "eval_loss": 1.2324182987213135, "eval_runtime": 70.9353, "eval_samples_per_second": 5.977, "eval_steps_per_second": 1.001, "step": 532 }, { "epoch": 14.01, "learning_rate": 2.852852852852853e-05, "loss": 0.74, "step": 540 }, { "epoch": 14.02, "learning_rate": 2.8028028028028032e-05, "loss": 0.7484, "step": 550 }, { "epoch": 14.03, "learning_rate": 2.752752752752753e-05, "loss": 0.7962, "step": 560 }, { "epoch": 14.03, "learning_rate": 2.702702702702703e-05, "loss": 0.7361, "step": 570 }, { "epoch": 14.03, "eval_accuracy": 0.6226415094339622, "eval_loss": 1.022481083869934, "eval_runtime": 70.6845, "eval_samples_per_second": 5.998, "eval_steps_per_second": 1.004, "step": 570 }, { "epoch": 15.01, "learning_rate": 2.652652652652653e-05, "loss": 0.8396, "step": 580 }, { "epoch": 15.02, "learning_rate": 2.6026026026026028e-05, "loss": 0.7193, "step": 590 }, { "epoch": 15.03, "learning_rate": 2.552552552552553e-05, "loss": 0.6979, "step": 600 }, { "epoch": 15.03, "eval_accuracy": 0.5589622641509434, "eval_loss": 1.0737839937210083, "eval_runtime": 69.8991, "eval_samples_per_second": 6.066, "eval_steps_per_second": 1.016, "step": 608 }, { "epoch": 16.0, "learning_rate": 2.502502502502503e-05, "loss": 0.6869, "step": 610 }, { "epoch": 16.01, "learning_rate": 2.4524524524524527e-05, "loss": 0.6704, "step": 620 }, { "epoch": 16.02, "learning_rate": 2.4024024024024024e-05, "loss": 0.8011, "step": 630 }, { "epoch": 16.03, "learning_rate": 2.3523523523523525e-05, "loss": 0.5265, "step": 640 }, { "epoch": 16.03, "eval_accuracy": 0.5872641509433962, "eval_loss": 1.1062343120574951, "eval_runtime": 70.6095, "eval_samples_per_second": 6.005, "eval_steps_per_second": 1.006, "step": 646 }, { "epoch": 17.0, "learning_rate": 2.3023023023023026e-05, "loss": 0.6415, "step": 650 }, { "epoch": 17.01, "learning_rate": 2.2522522522522523e-05, "loss": 0.7402, "step": 660 }, { "epoch": 17.02, "learning_rate": 2.2022022022022024e-05, "loss": 0.6275, "step": 670 }, { "epoch": 17.03, "learning_rate": 2.1521521521521524e-05, "loss": 0.5651, "step": 680 }, { "epoch": 17.03, "eval_accuracy": 0.5801886792452831, "eval_loss": 1.1402446031570435, "eval_runtime": 70.0342, "eval_samples_per_second": 6.054, "eval_steps_per_second": 1.014, "step": 684 }, { "epoch": 18.01, "learning_rate": 2.102102102102102e-05, "loss": 0.5542, "step": 690 }, { "epoch": 18.01, "learning_rate": 2.0520520520520522e-05, "loss": 0.6903, "step": 700 }, { "epoch": 18.02, "learning_rate": 2.0020020020020023e-05, "loss": 0.7, "step": 710 }, { "epoch": 18.03, "learning_rate": 1.951951951951952e-05, "loss": 0.7182, "step": 720 }, { "epoch": 18.03, "eval_accuracy": 0.5801886792452831, "eval_loss": 1.097399115562439, "eval_runtime": 70.689, "eval_samples_per_second": 5.998, "eval_steps_per_second": 1.004, "step": 722 }, { "epoch": 19.01, "learning_rate": 1.9019019019019018e-05, "loss": 0.5921, "step": 730 }, { "epoch": 19.02, "learning_rate": 1.8518518518518518e-05, "loss": 0.5428, "step": 740 }, { "epoch": 19.03, "learning_rate": 1.801801801801802e-05, "loss": 0.7289, "step": 750 }, { "epoch": 19.03, "learning_rate": 1.7517517517517516e-05, "loss": 0.6582, "step": 760 }, { "epoch": 19.03, "eval_accuracy": 0.6179245283018868, "eval_loss": 1.0529001951217651, "eval_runtime": 70.8949, "eval_samples_per_second": 5.981, "eval_steps_per_second": 1.001, "step": 760 }, { "epoch": 20.01, "learning_rate": 1.7017017017017017e-05, "loss": 0.5001, "step": 770 }, { "epoch": 20.02, "learning_rate": 1.6516516516516518e-05, "loss": 0.5387, "step": 780 }, { "epoch": 20.03, "learning_rate": 1.6016016016016015e-05, "loss": 0.5709, "step": 790 }, { "epoch": 20.03, "eval_accuracy": 0.6344339622641509, "eval_loss": 0.965450644493103, "eval_runtime": 69.2205, "eval_samples_per_second": 6.125, "eval_steps_per_second": 1.026, "step": 798 }, { "epoch": 21.0, "learning_rate": 1.5515515515515516e-05, "loss": 0.7102, "step": 800 }, { "epoch": 21.01, "learning_rate": 1.5015015015015016e-05, "loss": 0.4746, "step": 810 }, { "epoch": 21.02, "learning_rate": 1.4514514514514515e-05, "loss": 0.611, "step": 820 }, { "epoch": 21.03, "learning_rate": 1.4014014014014016e-05, "loss": 0.4808, "step": 830 }, { "epoch": 21.03, "eval_accuracy": 0.6226415094339622, "eval_loss": 1.0440747737884521, "eval_runtime": 70.6133, "eval_samples_per_second": 6.005, "eval_steps_per_second": 1.005, "step": 836 }, { "epoch": 22.0, "learning_rate": 1.3513513513513515e-05, "loss": 0.5081, "step": 840 }, { "epoch": 22.01, "learning_rate": 1.3013013013013014e-05, "loss": 0.6717, "step": 850 }, { "epoch": 22.02, "learning_rate": 1.2512512512512515e-05, "loss": 0.343, "step": 860 }, { "epoch": 22.03, "learning_rate": 1.2012012012012012e-05, "loss": 0.5816, "step": 870 }, { "epoch": 22.03, "eval_accuracy": 0.6438679245283019, "eval_loss": 0.9445228576660156, "eval_runtime": 70.5922, "eval_samples_per_second": 6.006, "eval_steps_per_second": 1.006, "step": 874 }, { "epoch": 23.01, "learning_rate": 1.1511511511511513e-05, "loss": 0.4179, "step": 880 }, { "epoch": 23.01, "learning_rate": 1.1011011011011012e-05, "loss": 0.4346, "step": 890 }, { "epoch": 23.02, "learning_rate": 1.051051051051051e-05, "loss": 0.4038, "step": 900 }, { "epoch": 23.03, "learning_rate": 1.0010010010010011e-05, "loss": 0.5057, "step": 910 }, { "epoch": 23.03, "eval_accuracy": 0.6320754716981132, "eval_loss": 1.0248113870620728, "eval_runtime": 71.1172, "eval_samples_per_second": 5.962, "eval_steps_per_second": 0.998, "step": 912 }, { "epoch": 24.01, "learning_rate": 9.509509509509509e-06, "loss": 0.393, "step": 920 }, { "epoch": 24.02, "learning_rate": 9.00900900900901e-06, "loss": 0.4297, "step": 930 }, { "epoch": 24.03, "learning_rate": 8.508508508508508e-06, "loss": 0.5053, "step": 940 }, { "epoch": 24.03, "learning_rate": 8.008008008008007e-06, "loss": 0.6253, "step": 950 }, { "epoch": 24.03, "eval_accuracy": 0.660377358490566, "eval_loss": 0.9517852067947388, "eval_runtime": 70.664, "eval_samples_per_second": 6.0, "eval_steps_per_second": 1.005, "step": 950 }, { "epoch": 25.01, "learning_rate": 7.507507507507508e-06, "loss": 0.4143, "step": 960 }, { "epoch": 25.02, "learning_rate": 7.007007007007008e-06, "loss": 0.6003, "step": 970 }, { "epoch": 25.03, "learning_rate": 6.506506506506507e-06, "loss": 0.6841, "step": 980 }, { "epoch": 25.03, "eval_accuracy": 0.6745283018867925, "eval_loss": 0.8912516236305237, "eval_runtime": 70.8493, "eval_samples_per_second": 5.985, "eval_steps_per_second": 1.002, "step": 988 }, { "epoch": 26.0, "learning_rate": 6.006006006006006e-06, "loss": 0.2104, "step": 990 }, { "epoch": 26.01, "learning_rate": 5.505505505505506e-06, "loss": 0.6054, "step": 1000 }, { "epoch": 26.02, "learning_rate": 5.005005005005006e-06, "loss": 0.6045, "step": 1010 }, { "epoch": 26.03, "learning_rate": 4.504504504504505e-06, "loss": 0.5933, "step": 1020 }, { "epoch": 26.03, "eval_accuracy": 0.6438679245283019, "eval_loss": 0.9012843370437622, "eval_runtime": 71.2482, "eval_samples_per_second": 5.951, "eval_steps_per_second": 0.997, "step": 1026 }, { "epoch": 27.0, "learning_rate": 4.004004004004004e-06, "loss": 0.4052, "step": 1030 }, { "epoch": 27.01, "learning_rate": 3.503503503503504e-06, "loss": 0.4422, "step": 1040 }, { "epoch": 27.02, "learning_rate": 3.003003003003003e-06, "loss": 0.4861, "step": 1050 }, { "epoch": 27.03, "learning_rate": 2.502502502502503e-06, "loss": 0.389, "step": 1060 }, { "epoch": 27.03, "eval_accuracy": 0.6627358490566038, "eval_loss": 0.9089723229408264, "eval_runtime": 71.3879, "eval_samples_per_second": 5.939, "eval_steps_per_second": 0.995, "step": 1064 }, { "epoch": 28.01, "learning_rate": 2.002002002002002e-06, "loss": 0.4572, "step": 1070 }, { "epoch": 28.01, "learning_rate": 1.5015015015015015e-06, "loss": 0.213, "step": 1080 }, { "epoch": 28.02, "learning_rate": 1.001001001001001e-06, "loss": 0.6041, "step": 1090 }, { "epoch": 28.03, "learning_rate": 5.005005005005005e-07, "loss": 0.3705, "step": 1100 }, { "epoch": 28.03, "eval_accuracy": 0.6721698113207547, "eval_loss": 0.8935866355895996, "eval_runtime": 70.2956, "eval_samples_per_second": 6.032, "eval_steps_per_second": 1.01, "step": 1102 }, { "epoch": 29.01, "learning_rate": 0.0, "loss": 0.6043, "step": 1110 }, { "epoch": 29.01, "eval_accuracy": 0.6721698113207547, "eval_loss": 0.8942137956619263, "eval_runtime": 69.8197, "eval_samples_per_second": 6.073, "eval_steps_per_second": 1.017, "step": 1110 }, { "epoch": 29.01, "step": 1110, "total_flos": 8.191332105366897e+18, "train_loss": 1.059841639716346, "train_runtime": 5977.6934, "train_samples_per_second": 1.114, "train_steps_per_second": 0.186 }, { "epoch": 29.01, "eval_accuracy": 0.6745283018867925, "eval_loss": 0.8912516236305237, "eval_runtime": 68.8583, "eval_samples_per_second": 6.158, "eval_steps_per_second": 1.031, "step": 1110 }, { "epoch": 29.01, "eval_accuracy": 0.6745283018867925, "eval_loss": 0.8912516236305237, "eval_runtime": 68.7355, "eval_samples_per_second": 6.169, "eval_steps_per_second": 1.033, "step": 1110 } ], "max_steps": 1110, "num_train_epochs": 9223372036854775807, "total_flos": 8.191332105366897e+18, "trial_name": null, "trial_params": null }