Muffakir_Embedding / trainer_state.json
mohamed2811's picture
Upload 16 files
5db55c7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 3039,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09871668311944719,
"grad_norm": 7.595918655395508,
"learning_rate": 6.578947368421054e-06,
"loss": 0.4785,
"step": 100
},
{
"epoch": 0.09871668311944719,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.48153268812956745,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8239186253209559,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.6804266245309105,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7476298637171638,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6014421644194009,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.5954963687693909,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.650756003676077,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.48153268812956745,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.0823918625320956,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.22680887484363688,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.14952597274343274,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.48153268812956745,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8239186253209559,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.6804266245309105,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.7476298637171638,
"eval_loss": 0.4484286308288574,
"eval_runtime": 204.03,
"eval_samples_per_second": 9.925,
"eval_steps_per_second": 0.622,
"step": 100
},
{
"epoch": 0.19743336623889438,
"grad_norm": 10.92927074432373,
"learning_rate": 1.3157894736842108e-05,
"loss": 0.4112,
"step": 200
},
{
"epoch": 0.19743336623889438,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5033083152281256,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8463855421686747,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7041279873592732,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.772615050365396,
"eval_custom_dataset_evaluation_cosine_map@100": 0.624250951720462,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6187061477761295,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6738741593374042,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5033083152281256,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08463855421686747,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2347093291197577,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1545230100730792,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5033083152281256,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8463855421686747,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7041279873592732,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.772615050365396,
"eval_loss": 0.3707010746002197,
"eval_runtime": 203.8165,
"eval_samples_per_second": 9.935,
"eval_steps_per_second": 0.623,
"step": 200
},
{
"epoch": 0.29615004935834155,
"grad_norm": 33.57826232910156,
"learning_rate": 1.9736842105263158e-05,
"loss": 0.2838,
"step": 300
},
{
"epoch": 0.29615004935834155,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5243432747382974,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.860655737704918,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7234841003357693,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7913292514319573,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6432678473897245,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6380460561998335,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6920347683997495,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5243432747382974,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.0860655737704918,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.24116136677858976,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.15826585028639148,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5243432747382974,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.860655737704918,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7234841003357693,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.7913292514319573,
"eval_loss": 0.32815688848495483,
"eval_runtime": 203.3881,
"eval_samples_per_second": 9.956,
"eval_steps_per_second": 0.624,
"step": 300
},
{
"epoch": 0.39486673247778875,
"grad_norm": 2.814175605773926,
"learning_rate": 1.9297989031078612e-05,
"loss": 0.2422,
"step": 400
},
{
"epoch": 0.39486673247778875,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5371321350977681,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8726051747975508,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7371123839620778,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8044143788267826,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6563831674366017,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6514166541262787,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7051169138321652,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5371321350977681,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08726051747975509,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.24570412798735927,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1608828757653565,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5371321350977681,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8726051747975508,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7371123839620778,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8044143788267826,
"eval_loss": 0.28870803117752075,
"eval_runtime": 203.6125,
"eval_samples_per_second": 9.945,
"eval_steps_per_second": 0.624,
"step": 400
},
{
"epoch": 0.49358341559723595,
"grad_norm": 7.553986072540283,
"learning_rate": 1.856672760511883e-05,
"loss": 0.2369,
"step": 500
},
{
"epoch": 0.49358341559723595,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5465139245506616,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8817894528935414,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7465929290934229,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8143393245111594,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6652980546980072,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6607006284309909,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.714385034123883,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5465139245506616,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08817894528935415,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.24886430969780762,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16286786490223187,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5465139245506616,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8817894528935414,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7465929290934229,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8143393245111594,
"eval_loss": 0.28374621272087097,
"eval_runtime": 204.4559,
"eval_samples_per_second": 9.904,
"eval_steps_per_second": 0.621,
"step": 500
},
{
"epoch": 0.5923000987166831,
"grad_norm": 32.98969268798828,
"learning_rate": 1.783546617915905e-05,
"loss": 0.2899,
"step": 600
},
{
"epoch": 0.5923000987166831,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5526367766146554,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8908749753110804,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7583448548291527,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8260912502468892,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6738304576911812,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.669296996730104,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7232271970632966,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5526367766146554,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08908749753110803,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.25278161827638423,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16521825004937787,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5526367766146554,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8908749753110804,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7583448548291527,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8260912502468892,
"eval_loss": 0.26842400431632996,
"eval_runtime": 203.2737,
"eval_samples_per_second": 9.962,
"eval_steps_per_second": 0.625,
"step": 600
},
{
"epoch": 0.6910167818361304,
"grad_norm": 9.886919021606445,
"learning_rate": 1.710420475319927e-05,
"loss": 0.1801,
"step": 700
},
{
"epoch": 0.6910167818361304,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5653268812956745,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8955164921983014,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7669365988544341,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8332510369346238,
"eval_custom_dataset_evaluation_cosine_map@100": 0.683737510172223,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6793955094382355,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7320247334733945,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5653268812956745,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08955164921983014,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.255645532951478,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16665020738692476,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5653268812956745,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8955164921983014,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7669365988544341,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8332510369346238,
"eval_loss": 0.2664617896080017,
"eval_runtime": 203.9076,
"eval_samples_per_second": 9.931,
"eval_steps_per_second": 0.623,
"step": 700
},
{
"epoch": 0.7897334649555775,
"grad_norm": 17.79576873779297,
"learning_rate": 1.637294332723949e-05,
"loss": 0.2279,
"step": 800
},
{
"epoch": 0.7897334649555775,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5654256369741261,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8918131542563698,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7636282836263085,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8272763183883073,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6824116708533701,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6777940607081065,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7298590424609255,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5654256369741261,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08918131542563698,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2545427612087695,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16545526367766147,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5654256369741261,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8918131542563698,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7636282836263085,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8272763183883073,
"eval_loss": 0.2617240250110626,
"eval_runtime": 203.217,
"eval_samples_per_second": 9.965,
"eval_steps_per_second": 0.625,
"step": 800
},
{
"epoch": 0.8884501480750246,
"grad_norm": 29.348552703857422,
"learning_rate": 1.564168190127971e-05,
"loss": 0.2051,
"step": 900
},
{
"epoch": 0.8884501480750246,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5677463954177365,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8971953387319771,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7670353545328856,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8318190795970768,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6851778091536691,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6808611691104925,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7334481505960582,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5677463954177365,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.08971953387319773,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.25567845151096186,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16636381591941535,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5677463954177365,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.8971953387319771,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7670353545328856,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8318190795970768,
"eval_loss": 0.26834186911582947,
"eval_runtime": 203.4633,
"eval_samples_per_second": 9.953,
"eval_steps_per_second": 0.624,
"step": 900
},
{
"epoch": 0.9871668311944719,
"grad_norm": 0.5696656703948975,
"learning_rate": 1.491042047531993e-05,
"loss": 0.2097,
"step": 1000
},
{
"epoch": 0.9871668311944719,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5728323128579893,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9064783725064192,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7784910132332609,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8429290934228718,
"eval_custom_dataset_evaluation_cosine_map@100": 0.692767963880535,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6886411963231704,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.741697294005231,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5728323128579893,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09064783725064192,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.259497004411087,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1685858186845744,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5728323128579893,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9064783725064192,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7784910132332609,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8429290934228718,
"eval_loss": 0.2445395439863205,
"eval_runtime": 203.8284,
"eval_samples_per_second": 9.935,
"eval_steps_per_second": 0.623,
"step": 1000
},
{
"epoch": 1.085883514313919,
"grad_norm": 5.538768291473389,
"learning_rate": 1.4179159049360148e-05,
"loss": 0.1047,
"step": 1100
},
{
"epoch": 1.085883514313919,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5813253012048193,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9092435315030615,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7825399960497729,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8449535848311278,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6984402538800855,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6944231453526992,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7466897096167746,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5813253012048193,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09092435315030614,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2608466653499243,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16899071696622553,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5813253012048193,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9092435315030615,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7825399960497729,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8449535848311278,
"eval_loss": 0.23973241448402405,
"eval_runtime": 203.3277,
"eval_samples_per_second": 9.959,
"eval_steps_per_second": 0.625,
"step": 1100
},
{
"epoch": 1.1846001974333662,
"grad_norm": 6.3217902183532715,
"learning_rate": 1.3447897623400368e-05,
"loss": 0.0984,
"step": 1200
},
{
"epoch": 1.1846001974333662,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5921390479952597,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151688722101521,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7911317400750543,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8541378629271184,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7083777420863926,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7045087061752225,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.755835586028989,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5921390479952597,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09151688722101521,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2637105800250181,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1708275725854237,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5921390479952597,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9151688722101521,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7911317400750543,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8541378629271184,
"eval_loss": 0.2229250818490982,
"eval_runtime": 203.6954,
"eval_samples_per_second": 9.941,
"eval_steps_per_second": 0.623,
"step": 1200
},
{
"epoch": 1.2833168805528135,
"grad_norm": 11.505134582519531,
"learning_rate": 1.2716636197440586e-05,
"loss": 0.0498,
"step": 1300
},
{
"epoch": 1.2833168805528135,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5782638751728224,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9055895714003556,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7789354137862927,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8386825992494569,
"eval_custom_dataset_evaluation_cosine_map@100": 0.6954513367155198,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.6912109797503966,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7433337553688117,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5782638751728224,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09055895714003556,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2596451379287642,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.16773651984989138,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5782638751728224,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9055895714003556,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7789354137862927,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8386825992494569,
"eval_loss": 0.2586788237094879,
"eval_runtime": 204.0885,
"eval_samples_per_second": 9.922,
"eval_steps_per_second": 0.622,
"step": 1300
},
{
"epoch": 1.3820335636722607,
"grad_norm": 1.1614787578582764,
"learning_rate": 1.1985374771480804e-05,
"loss": 0.0993,
"step": 1400
},
{
"epoch": 1.3820335636722607,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.59411416156429,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151194943709263,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7925636974126012,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8543353742840213,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7096847447019631,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.705814299351989,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7568020206373794,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.59411416156429,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09151194943709264,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2641878991375337,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17086707485680427,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.59411416156429,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9151194943709263,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7925636974126012,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8543353742840213,
"eval_loss": 0.2147156298160553,
"eval_runtime": 204.4732,
"eval_samples_per_second": 9.903,
"eval_steps_per_second": 0.621,
"step": 1400
},
{
"epoch": 1.4807502467917077,
"grad_norm": 1.6429851055145264,
"learning_rate": 1.1254113345521024e-05,
"loss": 0.0621,
"step": 1500
},
{
"epoch": 1.4807502467917077,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5962867864902232,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9217855026664034,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7997234841003358,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.860853249061821,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7136002941229976,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7100783500904636,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7616828218263099,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5962867864902232,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09217855026664035,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2665744947001119,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1721706498123642,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5962867864902232,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9217855026664034,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7997234841003358,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.860853249061821,
"eval_loss": 0.20447228848934174,
"eval_runtime": 204.051,
"eval_samples_per_second": 9.924,
"eval_steps_per_second": 0.622,
"step": 1500
},
{
"epoch": 1.579466929911155,
"grad_norm": 15.424310684204102,
"learning_rate": 1.0522851919561243e-05,
"loss": 0.0922,
"step": 1600
},
{
"epoch": 1.579466929911155,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5950029626703536,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9201560339719533,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7980446375666601,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.858038712225953,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7121969229630567,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7085377027234775,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7601050276866773,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.5950029626703536,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09201560339719535,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.26601487918888667,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17160774244519061,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.5950029626703536,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9201560339719533,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.7980446375666601,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.858038712225953,
"eval_loss": 0.20542284846305847,
"eval_runtime": 203.1105,
"eval_samples_per_second": 9.97,
"eval_steps_per_second": 0.625,
"step": 1600
},
{
"epoch": 1.678183613030602,
"grad_norm": 4.0268096923828125,
"learning_rate": 9.791590493601464e-06,
"loss": 0.1093,
"step": 1700
},
{
"epoch": 1.678183613030602,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6026071499111199,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9248469286984001,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8095496741062611,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8681117914280071,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7204789219182511,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7169305441908291,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7677587208908659,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6026071499111199,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09248469286984001,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.26984989136875365,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1736223582856014,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6026071499111199,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9248469286984001,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8095496741062611,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8681117914280071,
"eval_loss": 0.1992715299129486,
"eval_runtime": 203.9449,
"eval_samples_per_second": 9.929,
"eval_steps_per_second": 0.623,
"step": 1700
},
{
"epoch": 1.7769002961500493,
"grad_norm": 0.6978006958961487,
"learning_rate": 9.060329067641682e-06,
"loss": 0.0795,
"step": 1800
},
{
"epoch": 1.7769002961500493,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.605816709460794,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9286984001580091,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8117222990321944,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8722101520837449,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7237388658743508,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7203004837460861,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7712328450315743,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.605816709460794,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09286984001580093,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2705740996773981,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17444203041674897,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.605816709460794,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9286984001580091,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8117222990321944,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8722101520837449,
"eval_loss": 0.19426828622817993,
"eval_runtime": 204.0488,
"eval_samples_per_second": 9.924,
"eval_steps_per_second": 0.622,
"step": 1800
},
{
"epoch": 1.8756169792694966,
"grad_norm": 2.5955662727355957,
"learning_rate": 8.329067641681902e-06,
"loss": 0.1181,
"step": 1900
},
{
"epoch": 1.8756169792694966,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6043353742840213,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9297847126209757,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8089077621963263,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8701362828362631,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7219374022367849,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7185761194661708,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7700958544922684,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6043353742840213,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09297847126209759,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2696359207321088,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17402725656725263,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6043353742840213,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9297847126209757,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8089077621963263,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8701362828362631,
"eval_loss": 0.20659147202968597,
"eval_runtime": 204.0716,
"eval_samples_per_second": 9.923,
"eval_steps_per_second": 0.622,
"step": 1900
},
{
"epoch": 1.9743336623889438,
"grad_norm": 0.8803901672363281,
"learning_rate": 7.597806215722121e-06,
"loss": 0.0709,
"step": 2000
},
{
"epoch": 1.9743336623889438,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.603199683981829,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9302784910132332,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8102903416946474,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8703831720323919,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7214873578820934,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7181931668908422,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7699451899898953,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.603199683981829,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09302784910132333,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.27009678056488245,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1740766344064784,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.603199683981829,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9302784910132332,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8102903416946474,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8703831720323919,
"eval_loss": 0.19996753334999084,
"eval_runtime": 203.9747,
"eval_samples_per_second": 9.928,
"eval_steps_per_second": 0.623,
"step": 2000
},
{
"epoch": 2.073050345508391,
"grad_norm": 0.9776083827018738,
"learning_rate": 6.866544789762341e-06,
"loss": 0.0423,
"step": 2100
},
{
"epoch": 2.073050345508391,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6150503653960103,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9359075646849694,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8209065771281848,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8796168279676081,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7317976394363255,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.728681000348013,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7793256141306921,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6150503653960103,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09359075646849695,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.27363552570939487,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17592336559352165,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6150503653960103,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9359075646849694,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8209065771281848,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8796168279676081,
"eval_loss": 0.19277189671993256,
"eval_runtime": 203.5377,
"eval_samples_per_second": 9.949,
"eval_steps_per_second": 0.624,
"step": 2100
},
{
"epoch": 2.171767028627838,
"grad_norm": 0.3466501235961914,
"learning_rate": 6.13528336380256e-06,
"loss": 0.0365,
"step": 2200
},
{
"epoch": 2.171767028627838,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6160379221805253,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9352162749358088,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8210547106458621,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8787280268615445,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7324401240286031,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7293587895689079,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7796830803735836,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6160379221805253,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.0935216274935809,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2736849035486207,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17574560537230893,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6160379221805253,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9352162749358088,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8210547106458621,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8787280268615445,
"eval_loss": 0.2016632854938507,
"eval_runtime": 203.3562,
"eval_samples_per_second": 9.958,
"eval_steps_per_second": 0.625,
"step": 2200
},
{
"epoch": 2.270483711747285,
"grad_norm": 11.319628715515137,
"learning_rate": 5.40402193784278e-06,
"loss": 0.0488,
"step": 2300
},
{
"epoch": 2.270483711747285,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6060635986569227,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9303772466916848,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.810784120086905,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8715188623345842,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7233919610739875,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7200670950782208,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7713978686997449,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6060635986569227,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09303772466916849,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.27026137336230166,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17430377246691683,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6060635986569227,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9303772466916848,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.810784120086905,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8715188623345842,
"eval_loss": 0.2220190316438675,
"eval_runtime": 203.7981,
"eval_samples_per_second": 9.936,
"eval_steps_per_second": 0.623,
"step": 2300
},
{
"epoch": 2.3692003948667324,
"grad_norm": 1.2127763032913208,
"learning_rate": 4.672760511882998e-06,
"loss": 0.0405,
"step": 2400
},
{
"epoch": 2.3692003948667324,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.612482717756271,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9345249851866483,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8190302192376062,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8769010468101915,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7299302543873705,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7267631415592252,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7775318729565346,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.612482717756271,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09345249851866484,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.27301007307920205,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17538020936203833,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.612482717756271,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9345249851866483,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8190302192376062,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8769010468101915,
"eval_loss": 0.20903073251247406,
"eval_runtime": 204.2344,
"eval_samples_per_second": 9.915,
"eval_steps_per_second": 0.622,
"step": 2400
},
{
"epoch": 2.4679170779861797,
"grad_norm": 0.3473336398601532,
"learning_rate": 3.941499085923218e-06,
"loss": 0.0327,
"step": 2500
},
{
"epoch": 2.4679170779861797,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6181611692672329,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8215484890381197,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8832214102310882,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7347609073841157,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7317605707764854,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7821769875234544,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6181611692672329,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324512,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.27384949634603983,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17664428204621765,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6181611692672329,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8215484890381197,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8832214102310882,
"eval_loss": 0.1959671527147293,
"eval_runtime": 204.2804,
"eval_samples_per_second": 9.913,
"eval_steps_per_second": 0.622,
"step": 2500
},
{
"epoch": 2.566633761105627,
"grad_norm": 3.084174871444702,
"learning_rate": 3.210237659963437e-06,
"loss": 0.0369,
"step": 2600
},
{
"epoch": 2.566633761105627,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6188524590163934,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.882678253999605,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7355447502017791,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7324816047954649,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7827478877040044,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6188524590163934,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324513,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17653565079992103,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6188524590163934,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.882678253999605,
"eval_loss": 0.19852839410305023,
"eval_runtime": 203.3999,
"eval_samples_per_second": 9.956,
"eval_steps_per_second": 0.624,
"step": 2600
},
{
"epoch": 2.665350444225074,
"grad_norm": 0.2841149568557739,
"learning_rate": 2.4789762340036565e-06,
"loss": 0.0493,
"step": 2700
},
{
"epoch": 2.665350444225074,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6227039304760024,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9392652577523207,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8246099150701165,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8839126999802489,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7377982498848669,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7347973078888687,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847760881684369,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6227039304760024,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.0939265257752321,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2748699716900388,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.1767825399960498,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6227039304760024,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9392652577523207,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8246099150701165,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8839126999802489,
"eval_loss": 0.1986970454454422,
"eval_runtime": 203.1067,
"eval_samples_per_second": 9.97,
"eval_steps_per_second": 0.625,
"step": 2700
},
{
"epoch": 2.7640671273445214,
"grad_norm": 35.21987533569336,
"learning_rate": 1.7477148080438758e-06,
"loss": 0.0466,
"step": 2800
},
{
"epoch": 2.7640671273445214,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6232964645467114,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9393640134307722,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262887616037922,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8854434129962473,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7388327302504635,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7358363978944812,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7856277834747031,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6232964645467114,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09393640134307724,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2754295872012641,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17708868259924945,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6232964645467114,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9393640134307722,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8262887616037922,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8854434129962473,
"eval_loss": 0.20075508952140808,
"eval_runtime": 203.5383,
"eval_samples_per_second": 9.949,
"eval_steps_per_second": 0.624,
"step": 2800
},
{
"epoch": 2.8627838104639682,
"grad_norm": 1.2376320362091064,
"learning_rate": 1.0164533820840951e-06,
"loss": 0.03,
"step": 2900
},
{
"epoch": 2.8627838104639682,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6220620185660676,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9396109026269011,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8844558562117322,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7376002793094979,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346429237637674,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847496074277119,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.6220620185660676,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09396109026269013,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17689117124234646,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.6220620185660676,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9396109026269011,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8844558562117322,
"eval_loss": 0.2035462111234665,
"eval_runtime": 202.781,
"eval_samples_per_second": 9.986,
"eval_steps_per_second": 0.626,
"step": 2900
},
{
"epoch": 2.9615004935834155,
"grad_norm": 0.5317863821983337,
"learning_rate": 2.8519195612431445e-07,
"loss": 0.0633,
"step": 3000
},
{
"epoch": 2.9615004935834155,
"eval_custom_dataset_evaluation_cosine_accuracy@1": 0.621963262887616,
"eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9401046810191586,
"eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262393837645665,
"eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8843077226940549,
"eval_custom_dataset_evaluation_cosine_map@100": 0.7376242387041089,
"eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346985522104195,
"eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7849111131501391,
"eval_custom_dataset_evaluation_cosine_precision@1": 0.621963262887616,
"eval_custom_dataset_evaluation_cosine_precision@10": 0.09401046810191586,
"eval_custom_dataset_evaluation_cosine_precision@3": 0.2754131279215221,
"eval_custom_dataset_evaluation_cosine_precision@5": 0.17686154453881098,
"eval_custom_dataset_evaluation_cosine_recall@1": 0.621963262887616,
"eval_custom_dataset_evaluation_cosine_recall@10": 0.9401046810191586,
"eval_custom_dataset_evaluation_cosine_recall@3": 0.8262393837645665,
"eval_custom_dataset_evaluation_cosine_recall@5": 0.8843077226940549,
"eval_loss": 0.20363624393939972,
"eval_runtime": 203.946,
"eval_samples_per_second": 9.929,
"eval_steps_per_second": 0.623,
"step": 3000
}
],
"logging_steps": 100,
"max_steps": 3039,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}