Training in progress, step 6000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 685354800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fab20f32621c18f7e567e201b068fd4e502966a6cf5803e5436f2004a34e1fe
|
3 |
size 685354800
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 4.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -41308,6 +41308,706 @@
|
|
41308 |
"learning_rate": 0.0005,
|
41309 |
"loss": 1.3641,
|
41310 |
"step": 5900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41311 |
}
|
41312 |
],
|
41313 |
"logging_steps": 1,
|
@@ -41322,12 +42022,12 @@
|
|
41322 |
"should_evaluate": false,
|
41323 |
"should_log": false,
|
41324 |
"should_save": true,
|
41325 |
-
"should_training_stop":
|
41326 |
},
|
41327 |
"attributes": {}
|
41328 |
}
|
41329 |
},
|
41330 |
-
"total_flos": 1.
|
41331 |
"train_batch_size": 8,
|
41332 |
"trial_name": null,
|
41333 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 4.930156121610517,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 6000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
41308 |
"learning_rate": 0.0005,
|
41309 |
"loss": 1.3641,
|
41310 |
"step": 5900
|
41311 |
+
},
|
41312 |
+
{
|
41313 |
+
"epoch": 4.848808545603944,
|
41314 |
+
"grad_norm": 0.5559499859809875,
|
41315 |
+
"learning_rate": 0.0005,
|
41316 |
+
"loss": 1.3687,
|
41317 |
+
"step": 5901
|
41318 |
+
},
|
41319 |
+
{
|
41320 |
+
"epoch": 4.849630238290879,
|
41321 |
+
"grad_norm": 0.535542905330658,
|
41322 |
+
"learning_rate": 0.0005,
|
41323 |
+
"loss": 1.2836,
|
41324 |
+
"step": 5902
|
41325 |
+
},
|
41326 |
+
{
|
41327 |
+
"epoch": 4.8504519309778145,
|
41328 |
+
"grad_norm": 0.5417159199714661,
|
41329 |
+
"learning_rate": 0.0005,
|
41330 |
+
"loss": 1.3408,
|
41331 |
+
"step": 5903
|
41332 |
+
},
|
41333 |
+
{
|
41334 |
+
"epoch": 4.851273623664749,
|
41335 |
+
"grad_norm": 0.543630838394165,
|
41336 |
+
"learning_rate": 0.0005,
|
41337 |
+
"loss": 1.3996,
|
41338 |
+
"step": 5904
|
41339 |
+
},
|
41340 |
+
{
|
41341 |
+
"epoch": 4.852095316351685,
|
41342 |
+
"grad_norm": 0.5195862054824829,
|
41343 |
+
"learning_rate": 0.0005,
|
41344 |
+
"loss": 1.3169,
|
41345 |
+
"step": 5905
|
41346 |
+
},
|
41347 |
+
{
|
41348 |
+
"epoch": 4.852917009038619,
|
41349 |
+
"grad_norm": 0.5713696479797363,
|
41350 |
+
"learning_rate": 0.0005,
|
41351 |
+
"loss": 1.4069,
|
41352 |
+
"step": 5906
|
41353 |
+
},
|
41354 |
+
{
|
41355 |
+
"epoch": 4.853738701725555,
|
41356 |
+
"grad_norm": 0.5813581943511963,
|
41357 |
+
"learning_rate": 0.0005,
|
41358 |
+
"loss": 1.3037,
|
41359 |
+
"step": 5907
|
41360 |
+
},
|
41361 |
+
{
|
41362 |
+
"epoch": 4.8545603944124895,
|
41363 |
+
"grad_norm": 0.5637884140014648,
|
41364 |
+
"learning_rate": 0.0005,
|
41365 |
+
"loss": 1.3991,
|
41366 |
+
"step": 5908
|
41367 |
+
},
|
41368 |
+
{
|
41369 |
+
"epoch": 4.855382087099425,
|
41370 |
+
"grad_norm": 0.5755516290664673,
|
41371 |
+
"learning_rate": 0.0005,
|
41372 |
+
"loss": 1.4464,
|
41373 |
+
"step": 5909
|
41374 |
+
},
|
41375 |
+
{
|
41376 |
+
"epoch": 4.85620377978636,
|
41377 |
+
"grad_norm": 0.5279106497764587,
|
41378 |
+
"learning_rate": 0.0005,
|
41379 |
+
"loss": 1.3729,
|
41380 |
+
"step": 5910
|
41381 |
+
},
|
41382 |
+
{
|
41383 |
+
"epoch": 4.857025472473295,
|
41384 |
+
"grad_norm": 0.5568866729736328,
|
41385 |
+
"learning_rate": 0.0005,
|
41386 |
+
"loss": 1.3846,
|
41387 |
+
"step": 5911
|
41388 |
+
},
|
41389 |
+
{
|
41390 |
+
"epoch": 4.85784716516023,
|
41391 |
+
"grad_norm": 0.5258705615997314,
|
41392 |
+
"learning_rate": 0.0005,
|
41393 |
+
"loss": 1.3301,
|
41394 |
+
"step": 5912
|
41395 |
+
},
|
41396 |
+
{
|
41397 |
+
"epoch": 4.8586688578471655,
|
41398 |
+
"grad_norm": 0.5256040692329407,
|
41399 |
+
"learning_rate": 0.0005,
|
41400 |
+
"loss": 1.3543,
|
41401 |
+
"step": 5913
|
41402 |
+
},
|
41403 |
+
{
|
41404 |
+
"epoch": 4.8594905505341,
|
41405 |
+
"grad_norm": 0.5508636236190796,
|
41406 |
+
"learning_rate": 0.0005,
|
41407 |
+
"loss": 1.3168,
|
41408 |
+
"step": 5914
|
41409 |
+
},
|
41410 |
+
{
|
41411 |
+
"epoch": 4.860312243221035,
|
41412 |
+
"grad_norm": 0.5376938581466675,
|
41413 |
+
"learning_rate": 0.0005,
|
41414 |
+
"loss": 1.3618,
|
41415 |
+
"step": 5915
|
41416 |
+
},
|
41417 |
+
{
|
41418 |
+
"epoch": 4.86113393590797,
|
41419 |
+
"grad_norm": 0.513633668422699,
|
41420 |
+
"learning_rate": 0.0005,
|
41421 |
+
"loss": 1.279,
|
41422 |
+
"step": 5916
|
41423 |
+
},
|
41424 |
+
{
|
41425 |
+
"epoch": 4.861955628594906,
|
41426 |
+
"grad_norm": 0.5526177287101746,
|
41427 |
+
"learning_rate": 0.0005,
|
41428 |
+
"loss": 1.2778,
|
41429 |
+
"step": 5917
|
41430 |
+
},
|
41431 |
+
{
|
41432 |
+
"epoch": 4.862777321281841,
|
41433 |
+
"grad_norm": 0.5580543279647827,
|
41434 |
+
"learning_rate": 0.0005,
|
41435 |
+
"loss": 1.3931,
|
41436 |
+
"step": 5918
|
41437 |
+
},
|
41438 |
+
{
|
41439 |
+
"epoch": 4.863599013968775,
|
41440 |
+
"grad_norm": 0.5640444159507751,
|
41441 |
+
"learning_rate": 0.0005,
|
41442 |
+
"loss": 1.4172,
|
41443 |
+
"step": 5919
|
41444 |
+
},
|
41445 |
+
{
|
41446 |
+
"epoch": 4.864420706655711,
|
41447 |
+
"grad_norm": 0.5366266369819641,
|
41448 |
+
"learning_rate": 0.0005,
|
41449 |
+
"loss": 1.2472,
|
41450 |
+
"step": 5920
|
41451 |
+
},
|
41452 |
+
{
|
41453 |
+
"epoch": 4.8652423993426455,
|
41454 |
+
"grad_norm": 0.5291760563850403,
|
41455 |
+
"learning_rate": 0.0005,
|
41456 |
+
"loss": 1.2402,
|
41457 |
+
"step": 5921
|
41458 |
+
},
|
41459 |
+
{
|
41460 |
+
"epoch": 4.866064092029581,
|
41461 |
+
"grad_norm": 0.5365450978279114,
|
41462 |
+
"learning_rate": 0.0005,
|
41463 |
+
"loss": 1.2807,
|
41464 |
+
"step": 5922
|
41465 |
+
},
|
41466 |
+
{
|
41467 |
+
"epoch": 4.866885784716516,
|
41468 |
+
"grad_norm": 0.5284454822540283,
|
41469 |
+
"learning_rate": 0.0005,
|
41470 |
+
"loss": 1.4549,
|
41471 |
+
"step": 5923
|
41472 |
+
},
|
41473 |
+
{
|
41474 |
+
"epoch": 4.867707477403451,
|
41475 |
+
"grad_norm": 0.541029155254364,
|
41476 |
+
"learning_rate": 0.0005,
|
41477 |
+
"loss": 1.3154,
|
41478 |
+
"step": 5924
|
41479 |
+
},
|
41480 |
+
{
|
41481 |
+
"epoch": 4.868529170090386,
|
41482 |
+
"grad_norm": 0.5600323677062988,
|
41483 |
+
"learning_rate": 0.0005,
|
41484 |
+
"loss": 1.3268,
|
41485 |
+
"step": 5925
|
41486 |
+
},
|
41487 |
+
{
|
41488 |
+
"epoch": 4.869350862777321,
|
41489 |
+
"grad_norm": 0.5529478788375854,
|
41490 |
+
"learning_rate": 0.0005,
|
41491 |
+
"loss": 1.3629,
|
41492 |
+
"step": 5926
|
41493 |
+
},
|
41494 |
+
{
|
41495 |
+
"epoch": 4.870172555464256,
|
41496 |
+
"grad_norm": 0.5471394658088684,
|
41497 |
+
"learning_rate": 0.0005,
|
41498 |
+
"loss": 1.2708,
|
41499 |
+
"step": 5927
|
41500 |
+
},
|
41501 |
+
{
|
41502 |
+
"epoch": 4.870994248151192,
|
41503 |
+
"grad_norm": 0.5359401702880859,
|
41504 |
+
"learning_rate": 0.0005,
|
41505 |
+
"loss": 1.3363,
|
41506 |
+
"step": 5928
|
41507 |
+
},
|
41508 |
+
{
|
41509 |
+
"epoch": 4.871815940838126,
|
41510 |
+
"grad_norm": 0.5148870348930359,
|
41511 |
+
"learning_rate": 0.0005,
|
41512 |
+
"loss": 1.2534,
|
41513 |
+
"step": 5929
|
41514 |
+
},
|
41515 |
+
{
|
41516 |
+
"epoch": 4.872637633525062,
|
41517 |
+
"grad_norm": 0.5293238759040833,
|
41518 |
+
"learning_rate": 0.0005,
|
41519 |
+
"loss": 1.3152,
|
41520 |
+
"step": 5930
|
41521 |
+
},
|
41522 |
+
{
|
41523 |
+
"epoch": 4.8734593262119965,
|
41524 |
+
"grad_norm": 0.5174310207366943,
|
41525 |
+
"learning_rate": 0.0005,
|
41526 |
+
"loss": 1.3534,
|
41527 |
+
"step": 5931
|
41528 |
+
},
|
41529 |
+
{
|
41530 |
+
"epoch": 4.874281018898932,
|
41531 |
+
"grad_norm": 0.5341978073120117,
|
41532 |
+
"learning_rate": 0.0005,
|
41533 |
+
"loss": 1.3888,
|
41534 |
+
"step": 5932
|
41535 |
+
},
|
41536 |
+
{
|
41537 |
+
"epoch": 4.875102711585867,
|
41538 |
+
"grad_norm": 0.5729701519012451,
|
41539 |
+
"learning_rate": 0.0005,
|
41540 |
+
"loss": 1.4173,
|
41541 |
+
"step": 5933
|
41542 |
+
},
|
41543 |
+
{
|
41544 |
+
"epoch": 4.875924404272802,
|
41545 |
+
"grad_norm": 0.6612470746040344,
|
41546 |
+
"learning_rate": 0.0005,
|
41547 |
+
"loss": 1.3927,
|
41548 |
+
"step": 5934
|
41549 |
+
},
|
41550 |
+
{
|
41551 |
+
"epoch": 4.876746096959737,
|
41552 |
+
"grad_norm": 0.5212023258209229,
|
41553 |
+
"learning_rate": 0.0005,
|
41554 |
+
"loss": 1.3259,
|
41555 |
+
"step": 5935
|
41556 |
+
},
|
41557 |
+
{
|
41558 |
+
"epoch": 4.8775677896466725,
|
41559 |
+
"grad_norm": 0.5570096969604492,
|
41560 |
+
"learning_rate": 0.0005,
|
41561 |
+
"loss": 1.2884,
|
41562 |
+
"step": 5936
|
41563 |
+
},
|
41564 |
+
{
|
41565 |
+
"epoch": 4.878389482333607,
|
41566 |
+
"grad_norm": 0.5613424181938171,
|
41567 |
+
"learning_rate": 0.0005,
|
41568 |
+
"loss": 1.2463,
|
41569 |
+
"step": 5937
|
41570 |
+
},
|
41571 |
+
{
|
41572 |
+
"epoch": 4.879211175020543,
|
41573 |
+
"grad_norm": 0.5613168478012085,
|
41574 |
+
"learning_rate": 0.0005,
|
41575 |
+
"loss": 1.3115,
|
41576 |
+
"step": 5938
|
41577 |
+
},
|
41578 |
+
{
|
41579 |
+
"epoch": 4.880032867707477,
|
41580 |
+
"grad_norm": 0.535275936126709,
|
41581 |
+
"learning_rate": 0.0005,
|
41582 |
+
"loss": 1.3002,
|
41583 |
+
"step": 5939
|
41584 |
+
},
|
41585 |
+
{
|
41586 |
+
"epoch": 4.880854560394413,
|
41587 |
+
"grad_norm": 0.5672900676727295,
|
41588 |
+
"learning_rate": 0.0005,
|
41589 |
+
"loss": 1.3669,
|
41590 |
+
"step": 5940
|
41591 |
+
},
|
41592 |
+
{
|
41593 |
+
"epoch": 4.881676253081348,
|
41594 |
+
"grad_norm": 0.5469388961791992,
|
41595 |
+
"learning_rate": 0.0005,
|
41596 |
+
"loss": 1.3652,
|
41597 |
+
"step": 5941
|
41598 |
+
},
|
41599 |
+
{
|
41600 |
+
"epoch": 4.882497945768282,
|
41601 |
+
"grad_norm": 0.529625415802002,
|
41602 |
+
"learning_rate": 0.0005,
|
41603 |
+
"loss": 1.4422,
|
41604 |
+
"step": 5942
|
41605 |
+
},
|
41606 |
+
{
|
41607 |
+
"epoch": 4.883319638455218,
|
41608 |
+
"grad_norm": 0.5889802575111389,
|
41609 |
+
"learning_rate": 0.0005,
|
41610 |
+
"loss": 1.3788,
|
41611 |
+
"step": 5943
|
41612 |
+
},
|
41613 |
+
{
|
41614 |
+
"epoch": 4.884141331142153,
|
41615 |
+
"grad_norm": 0.5382450819015503,
|
41616 |
+
"learning_rate": 0.0005,
|
41617 |
+
"loss": 1.3318,
|
41618 |
+
"step": 5944
|
41619 |
+
},
|
41620 |
+
{
|
41621 |
+
"epoch": 4.884963023829088,
|
41622 |
+
"grad_norm": 0.5242584347724915,
|
41623 |
+
"learning_rate": 0.0005,
|
41624 |
+
"loss": 1.268,
|
41625 |
+
"step": 5945
|
41626 |
+
},
|
41627 |
+
{
|
41628 |
+
"epoch": 4.885784716516023,
|
41629 |
+
"grad_norm": 0.5483070611953735,
|
41630 |
+
"learning_rate": 0.0005,
|
41631 |
+
"loss": 1.4016,
|
41632 |
+
"step": 5946
|
41633 |
+
},
|
41634 |
+
{
|
41635 |
+
"epoch": 4.886606409202958,
|
41636 |
+
"grad_norm": 0.5779204368591309,
|
41637 |
+
"learning_rate": 0.0005,
|
41638 |
+
"loss": 1.327,
|
41639 |
+
"step": 5947
|
41640 |
+
},
|
41641 |
+
{
|
41642 |
+
"epoch": 4.887428101889893,
|
41643 |
+
"grad_norm": 0.5551186800003052,
|
41644 |
+
"learning_rate": 0.0005,
|
41645 |
+
"loss": 1.4228,
|
41646 |
+
"step": 5948
|
41647 |
+
},
|
41648 |
+
{
|
41649 |
+
"epoch": 4.888249794576828,
|
41650 |
+
"grad_norm": 0.5995270609855652,
|
41651 |
+
"learning_rate": 0.0005,
|
41652 |
+
"loss": 1.3831,
|
41653 |
+
"step": 5949
|
41654 |
+
},
|
41655 |
+
{
|
41656 |
+
"epoch": 4.889071487263763,
|
41657 |
+
"grad_norm": 0.5424328446388245,
|
41658 |
+
"learning_rate": 0.0005,
|
41659 |
+
"loss": 1.3815,
|
41660 |
+
"step": 5950
|
41661 |
+
},
|
41662 |
+
{
|
41663 |
+
"epoch": 4.889893179950699,
|
41664 |
+
"grad_norm": 0.5349864959716797,
|
41665 |
+
"learning_rate": 0.0005,
|
41666 |
+
"loss": 1.2749,
|
41667 |
+
"step": 5951
|
41668 |
+
},
|
41669 |
+
{
|
41670 |
+
"epoch": 4.890714872637633,
|
41671 |
+
"grad_norm": 0.5398481488227844,
|
41672 |
+
"learning_rate": 0.0005,
|
41673 |
+
"loss": 1.3635,
|
41674 |
+
"step": 5952
|
41675 |
+
},
|
41676 |
+
{
|
41677 |
+
"epoch": 4.891536565324569,
|
41678 |
+
"grad_norm": 0.5872131586074829,
|
41679 |
+
"learning_rate": 0.0005,
|
41680 |
+
"loss": 1.3778,
|
41681 |
+
"step": 5953
|
41682 |
+
},
|
41683 |
+
{
|
41684 |
+
"epoch": 4.8923582580115035,
|
41685 |
+
"grad_norm": 0.5435046553611755,
|
41686 |
+
"learning_rate": 0.0005,
|
41687 |
+
"loss": 1.3834,
|
41688 |
+
"step": 5954
|
41689 |
+
},
|
41690 |
+
{
|
41691 |
+
"epoch": 4.893179950698439,
|
41692 |
+
"grad_norm": 0.551789402961731,
|
41693 |
+
"learning_rate": 0.0005,
|
41694 |
+
"loss": 1.3315,
|
41695 |
+
"step": 5955
|
41696 |
+
},
|
41697 |
+
{
|
41698 |
+
"epoch": 4.894001643385374,
|
41699 |
+
"grad_norm": 0.5277841687202454,
|
41700 |
+
"learning_rate": 0.0005,
|
41701 |
+
"loss": 1.4072,
|
41702 |
+
"step": 5956
|
41703 |
+
},
|
41704 |
+
{
|
41705 |
+
"epoch": 4.894823336072309,
|
41706 |
+
"grad_norm": 0.5183998346328735,
|
41707 |
+
"learning_rate": 0.0005,
|
41708 |
+
"loss": 1.2465,
|
41709 |
+
"step": 5957
|
41710 |
+
},
|
41711 |
+
{
|
41712 |
+
"epoch": 4.895645028759244,
|
41713 |
+
"grad_norm": 0.561173677444458,
|
41714 |
+
"learning_rate": 0.0005,
|
41715 |
+
"loss": 1.4095,
|
41716 |
+
"step": 5958
|
41717 |
+
},
|
41718 |
+
{
|
41719 |
+
"epoch": 4.8964667214461794,
|
41720 |
+
"grad_norm": 0.5499217510223389,
|
41721 |
+
"learning_rate": 0.0005,
|
41722 |
+
"loss": 1.312,
|
41723 |
+
"step": 5959
|
41724 |
+
},
|
41725 |
+
{
|
41726 |
+
"epoch": 4.897288414133114,
|
41727 |
+
"grad_norm": 0.534344494342804,
|
41728 |
+
"learning_rate": 0.0005,
|
41729 |
+
"loss": 1.326,
|
41730 |
+
"step": 5960
|
41731 |
+
},
|
41732 |
+
{
|
41733 |
+
"epoch": 4.89811010682005,
|
41734 |
+
"grad_norm": 0.5524152517318726,
|
41735 |
+
"learning_rate": 0.0005,
|
41736 |
+
"loss": 1.3709,
|
41737 |
+
"step": 5961
|
41738 |
+
},
|
41739 |
+
{
|
41740 |
+
"epoch": 4.898931799506984,
|
41741 |
+
"grad_norm": 0.5235154032707214,
|
41742 |
+
"learning_rate": 0.0005,
|
41743 |
+
"loss": 1.212,
|
41744 |
+
"step": 5962
|
41745 |
+
},
|
41746 |
+
{
|
41747 |
+
"epoch": 4.89975349219392,
|
41748 |
+
"grad_norm": 0.5188632607460022,
|
41749 |
+
"learning_rate": 0.0005,
|
41750 |
+
"loss": 1.2143,
|
41751 |
+
"step": 5963
|
41752 |
+
},
|
41753 |
+
{
|
41754 |
+
"epoch": 4.9005751848808545,
|
41755 |
+
"grad_norm": 0.5360555648803711,
|
41756 |
+
"learning_rate": 0.0005,
|
41757 |
+
"loss": 1.4402,
|
41758 |
+
"step": 5964
|
41759 |
+
},
|
41760 |
+
{
|
41761 |
+
"epoch": 4.901396877567789,
|
41762 |
+
"grad_norm": 0.5215834379196167,
|
41763 |
+
"learning_rate": 0.0005,
|
41764 |
+
"loss": 1.2509,
|
41765 |
+
"step": 5965
|
41766 |
+
},
|
41767 |
+
{
|
41768 |
+
"epoch": 4.902218570254725,
|
41769 |
+
"grad_norm": 0.526918888092041,
|
41770 |
+
"learning_rate": 0.0005,
|
41771 |
+
"loss": 1.2802,
|
41772 |
+
"step": 5966
|
41773 |
+
},
|
41774 |
+
{
|
41775 |
+
"epoch": 4.90304026294166,
|
41776 |
+
"grad_norm": 0.5387428402900696,
|
41777 |
+
"learning_rate": 0.0005,
|
41778 |
+
"loss": 1.294,
|
41779 |
+
"step": 5967
|
41780 |
+
},
|
41781 |
+
{
|
41782 |
+
"epoch": 4.903861955628595,
|
41783 |
+
"grad_norm": 0.5555245280265808,
|
41784 |
+
"learning_rate": 0.0005,
|
41785 |
+
"loss": 1.4213,
|
41786 |
+
"step": 5968
|
41787 |
+
},
|
41788 |
+
{
|
41789 |
+
"epoch": 4.90468364831553,
|
41790 |
+
"grad_norm": 0.5404963493347168,
|
41791 |
+
"learning_rate": 0.0005,
|
41792 |
+
"loss": 1.227,
|
41793 |
+
"step": 5969
|
41794 |
+
},
|
41795 |
+
{
|
41796 |
+
"epoch": 4.905505341002465,
|
41797 |
+
"grad_norm": 0.5623646974563599,
|
41798 |
+
"learning_rate": 0.0005,
|
41799 |
+
"loss": 1.4319,
|
41800 |
+
"step": 5970
|
41801 |
+
},
|
41802 |
+
{
|
41803 |
+
"epoch": 4.9063270336894,
|
41804 |
+
"grad_norm": 0.5389553904533386,
|
41805 |
+
"learning_rate": 0.0005,
|
41806 |
+
"loss": 1.297,
|
41807 |
+
"step": 5971
|
41808 |
+
},
|
41809 |
+
{
|
41810 |
+
"epoch": 4.907148726376335,
|
41811 |
+
"grad_norm": 0.5477744936943054,
|
41812 |
+
"learning_rate": 0.0005,
|
41813 |
+
"loss": 1.3659,
|
41814 |
+
"step": 5972
|
41815 |
+
},
|
41816 |
+
{
|
41817 |
+
"epoch": 4.90797041906327,
|
41818 |
+
"grad_norm": 0.5301917791366577,
|
41819 |
+
"learning_rate": 0.0005,
|
41820 |
+
"loss": 1.2658,
|
41821 |
+
"step": 5973
|
41822 |
+
},
|
41823 |
+
{
|
41824 |
+
"epoch": 4.908792111750206,
|
41825 |
+
"grad_norm": 0.5291617512702942,
|
41826 |
+
"learning_rate": 0.0005,
|
41827 |
+
"loss": 1.3592,
|
41828 |
+
"step": 5974
|
41829 |
+
},
|
41830 |
+
{
|
41831 |
+
"epoch": 4.90961380443714,
|
41832 |
+
"grad_norm": 0.5419930815696716,
|
41833 |
+
"learning_rate": 0.0005,
|
41834 |
+
"loss": 1.3125,
|
41835 |
+
"step": 5975
|
41836 |
+
},
|
41837 |
+
{
|
41838 |
+
"epoch": 4.910435497124076,
|
41839 |
+
"grad_norm": 0.5221468210220337,
|
41840 |
+
"learning_rate": 0.0005,
|
41841 |
+
"loss": 1.3643,
|
41842 |
+
"step": 5976
|
41843 |
+
},
|
41844 |
+
{
|
41845 |
+
"epoch": 4.9112571898110104,
|
41846 |
+
"grad_norm": 0.5470991134643555,
|
41847 |
+
"learning_rate": 0.0005,
|
41848 |
+
"loss": 1.3461,
|
41849 |
+
"step": 5977
|
41850 |
+
},
|
41851 |
+
{
|
41852 |
+
"epoch": 4.912078882497946,
|
41853 |
+
"grad_norm": 0.5354620814323425,
|
41854 |
+
"learning_rate": 0.0005,
|
41855 |
+
"loss": 1.2681,
|
41856 |
+
"step": 5978
|
41857 |
+
},
|
41858 |
+
{
|
41859 |
+
"epoch": 4.912900575184881,
|
41860 |
+
"grad_norm": 0.5851417779922485,
|
41861 |
+
"learning_rate": 0.0005,
|
41862 |
+
"loss": 1.3237,
|
41863 |
+
"step": 5979
|
41864 |
+
},
|
41865 |
+
{
|
41866 |
+
"epoch": 4.913722267871816,
|
41867 |
+
"grad_norm": 0.5492926239967346,
|
41868 |
+
"learning_rate": 0.0005,
|
41869 |
+
"loss": 1.417,
|
41870 |
+
"step": 5980
|
41871 |
+
},
|
41872 |
+
{
|
41873 |
+
"epoch": 4.914543960558751,
|
41874 |
+
"grad_norm": 0.5446394681930542,
|
41875 |
+
"learning_rate": 0.0005,
|
41876 |
+
"loss": 1.275,
|
41877 |
+
"step": 5981
|
41878 |
+
},
|
41879 |
+
{
|
41880 |
+
"epoch": 4.915365653245686,
|
41881 |
+
"grad_norm": 0.5484851002693176,
|
41882 |
+
"learning_rate": 0.0005,
|
41883 |
+
"loss": 1.3842,
|
41884 |
+
"step": 5982
|
41885 |
+
},
|
41886 |
+
{
|
41887 |
+
"epoch": 4.916187345932621,
|
41888 |
+
"grad_norm": 0.5432127714157104,
|
41889 |
+
"learning_rate": 0.0005,
|
41890 |
+
"loss": 1.4247,
|
41891 |
+
"step": 5983
|
41892 |
+
},
|
41893 |
+
{
|
41894 |
+
"epoch": 4.917009038619557,
|
41895 |
+
"grad_norm": 0.5324352979660034,
|
41896 |
+
"learning_rate": 0.0005,
|
41897 |
+
"loss": 1.3269,
|
41898 |
+
"step": 5984
|
41899 |
+
},
|
41900 |
+
{
|
41901 |
+
"epoch": 4.917830731306491,
|
41902 |
+
"grad_norm": 0.5508584976196289,
|
41903 |
+
"learning_rate": 0.0005,
|
41904 |
+
"loss": 1.1933,
|
41905 |
+
"step": 5985
|
41906 |
+
},
|
41907 |
+
{
|
41908 |
+
"epoch": 4.918652423993427,
|
41909 |
+
"grad_norm": 0.5484975576400757,
|
41910 |
+
"learning_rate": 0.0005,
|
41911 |
+
"loss": 1.2294,
|
41912 |
+
"step": 5986
|
41913 |
+
},
|
41914 |
+
{
|
41915 |
+
"epoch": 4.9194741166803615,
|
41916 |
+
"grad_norm": 0.5371730923652649,
|
41917 |
+
"learning_rate": 0.0005,
|
41918 |
+
"loss": 1.293,
|
41919 |
+
"step": 5987
|
41920 |
+
},
|
41921 |
+
{
|
41922 |
+
"epoch": 4.920295809367296,
|
41923 |
+
"grad_norm": 0.5321599841117859,
|
41924 |
+
"learning_rate": 0.0005,
|
41925 |
+
"loss": 1.3553,
|
41926 |
+
"step": 5988
|
41927 |
+
},
|
41928 |
+
{
|
41929 |
+
"epoch": 4.921117502054232,
|
41930 |
+
"grad_norm": 0.5347539186477661,
|
41931 |
+
"learning_rate": 0.0005,
|
41932 |
+
"loss": 1.3904,
|
41933 |
+
"step": 5989
|
41934 |
+
},
|
41935 |
+
{
|
41936 |
+
"epoch": 4.921939194741167,
|
41937 |
+
"grad_norm": 0.5540315508842468,
|
41938 |
+
"learning_rate": 0.0005,
|
41939 |
+
"loss": 1.3358,
|
41940 |
+
"step": 5990
|
41941 |
+
},
|
41942 |
+
{
|
41943 |
+
"epoch": 4.922760887428102,
|
41944 |
+
"grad_norm": 0.543171226978302,
|
41945 |
+
"learning_rate": 0.0005,
|
41946 |
+
"loss": 1.3914,
|
41947 |
+
"step": 5991
|
41948 |
+
},
|
41949 |
+
{
|
41950 |
+
"epoch": 4.923582580115037,
|
41951 |
+
"grad_norm": 0.5221793055534363,
|
41952 |
+
"learning_rate": 0.0005,
|
41953 |
+
"loss": 1.3852,
|
41954 |
+
"step": 5992
|
41955 |
+
},
|
41956 |
+
{
|
41957 |
+
"epoch": 4.924404272801972,
|
41958 |
+
"grad_norm": 0.5648449659347534,
|
41959 |
+
"learning_rate": 0.0005,
|
41960 |
+
"loss": 1.3695,
|
41961 |
+
"step": 5993
|
41962 |
+
},
|
41963 |
+
{
|
41964 |
+
"epoch": 4.925225965488907,
|
41965 |
+
"grad_norm": 0.5360020995140076,
|
41966 |
+
"learning_rate": 0.0005,
|
41967 |
+
"loss": 1.296,
|
41968 |
+
"step": 5994
|
41969 |
+
},
|
41970 |
+
{
|
41971 |
+
"epoch": 4.926047658175842,
|
41972 |
+
"grad_norm": 0.5223022699356079,
|
41973 |
+
"learning_rate": 0.0005,
|
41974 |
+
"loss": 1.2654,
|
41975 |
+
"step": 5995
|
41976 |
+
},
|
41977 |
+
{
|
41978 |
+
"epoch": 4.926869350862777,
|
41979 |
+
"grad_norm": 0.5244916677474976,
|
41980 |
+
"learning_rate": 0.0005,
|
41981 |
+
"loss": 1.2287,
|
41982 |
+
"step": 5996
|
41983 |
+
},
|
41984 |
+
{
|
41985 |
+
"epoch": 4.9276910435497125,
|
41986 |
+
"grad_norm": 0.5709188580513,
|
41987 |
+
"learning_rate": 0.0005,
|
41988 |
+
"loss": 1.414,
|
41989 |
+
"step": 5997
|
41990 |
+
},
|
41991 |
+
{
|
41992 |
+
"epoch": 4.928512736236647,
|
41993 |
+
"grad_norm": 0.5473321080207825,
|
41994 |
+
"learning_rate": 0.0005,
|
41995 |
+
"loss": 1.3689,
|
41996 |
+
"step": 5998
|
41997 |
+
},
|
41998 |
+
{
|
41999 |
+
"epoch": 4.929334428923583,
|
42000 |
+
"grad_norm": 0.5459017157554626,
|
42001 |
+
"learning_rate": 0.0005,
|
42002 |
+
"loss": 1.2876,
|
42003 |
+
"step": 5999
|
42004 |
+
},
|
42005 |
+
{
|
42006 |
+
"epoch": 4.930156121610517,
|
42007 |
+
"grad_norm": 0.5736708641052246,
|
42008 |
+
"learning_rate": 0.0005,
|
42009 |
+
"loss": 1.3623,
|
42010 |
+
"step": 6000
|
42011 |
}
|
42012 |
],
|
42013 |
"logging_steps": 1,
|
|
|
42022 |
"should_evaluate": false,
|
42023 |
"should_log": false,
|
42024 |
"should_save": true,
|
42025 |
+
"should_training_stop": true
|
42026 |
},
|
42027 |
"attributes": {}
|
42028 |
}
|
42029 |
},
|
42030 |
+
"total_flos": 1.2481056037404672e+18,
|
42031 |
"train_batch_size": 8,
|
42032 |
"trial_name": null,
|
42033 |
"trial_params": null
|