ikerion commited on
Commit
891758a
·
verified ·
1 Parent(s): 43b3e0d

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2c2d7f612913d93a077fdb78b3b8d2e485d7a3d9b8edf61f4b3878a29927581
3
  size 335605144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a6fe2d4b3225fd2a69dde200b394c96d6cf4eca02bc329d2419c8c1466f332
3
  size 335605144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c42dc892637ebac98711496849d8c72d68e7b4aac93a8d4107267a47139ac80a
3
  size 671467026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22d4a12b719d135438b26a2f413193a9867d93caceb92688f5f079f11cfd624
3
  size 671467026
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:150188ffb5cc1748a7977a1870ef1e2a294b0577c956793d6fdde54fa8cc8bf0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4bb882287d6976c062c089a7686086b15f0601112a612821d81ac9776ae1a44
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac658b35d124fb9bc77fb9f0ca508648718ba76b0e5a26a1e7feca7ebaaaca02
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5106a3d40df1efd273b9eb04f3299504d470352eb1b64d92b1e010e88c8900
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.011984162032604218,
3
- "best_model_checkpoint": "./mistral-magyar-portas-results/checkpoint-2000",
4
- "epoch": 1.567398119122257,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -319,6 +319,162 @@
319
  "eval_samples_per_second": 2.586,
320
  "eval_steps_per_second": 1.294,
321
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  }
323
  ],
324
  "logging_steps": 50,
@@ -326,7 +482,7 @@
326
  "num_input_tokens_seen": 0,
327
  "num_train_epochs": 3,
328
  "save_steps": 500,
329
- "total_flos": 2.2359343890432e+18,
330
  "train_batch_size": 2,
331
  "trial_name": null,
332
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.011125968769192696,
3
+ "best_model_checkpoint": "./mistral-magyar-portas-results/checkpoint-2500",
4
+ "epoch": 2.3510971786833856,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
319
  "eval_samples_per_second": 2.586,
320
  "eval_steps_per_second": 1.294,
321
  "step": 2000
322
+ },
323
+ {
324
+ "epoch": 1.6065830721003134,
325
+ "grad_norm": 0.09814453125,
326
+ "learning_rate": 9.275910610392104e-05,
327
+ "loss": 0.0091,
328
+ "step": 2050
329
+ },
330
+ {
331
+ "epoch": 1.64576802507837,
332
+ "grad_norm": 0.13671875,
333
+ "learning_rate": 8.856432702880984e-05,
334
+ "loss": 0.0096,
335
+ "step": 2100
336
+ },
337
+ {
338
+ "epoch": 1.6849529780564263,
339
+ "grad_norm": 0.07568359375,
340
+ "learning_rate": 8.43898474574128e-05,
341
+ "loss": 0.0092,
342
+ "step": 2150
343
+ },
344
+ {
345
+ "epoch": 1.7241379310344827,
346
+ "grad_norm": 0.046875,
347
+ "learning_rate": 8.02430775232462e-05,
348
+ "loss": 0.0084,
349
+ "step": 2200
350
+ },
351
+ {
352
+ "epoch": 1.7633228840125392,
353
+ "grad_norm": 0.062255859375,
354
+ "learning_rate": 7.61313781723508e-05,
355
+ "loss": 0.0087,
356
+ "step": 2250
357
+ },
358
+ {
359
+ "epoch": 1.8025078369905956,
360
+ "grad_norm": 0.06591796875,
361
+ "learning_rate": 7.206204809685029e-05,
362
+ "loss": 0.0086,
363
+ "step": 2300
364
+ },
365
+ {
366
+ "epoch": 1.841692789968652,
367
+ "grad_norm": 0.052978515625,
368
+ "learning_rate": 6.804231077901733e-05,
369
+ "loss": 0.0085,
370
+ "step": 2350
371
+ },
372
+ {
373
+ "epoch": 1.8808777429467085,
374
+ "grad_norm": 0.07421875,
375
+ "learning_rate": 6.407930166884409e-05,
376
+ "loss": 0.0094,
377
+ "step": 2400
378
+ },
379
+ {
380
+ "epoch": 1.9200626959247649,
381
+ "grad_norm": 0.1044921875,
382
+ "learning_rate": 6.018005551787984e-05,
383
+ "loss": 0.008,
384
+ "step": 2450
385
+ },
386
+ {
387
+ "epoch": 1.9592476489028212,
388
+ "grad_norm": 0.05615234375,
389
+ "learning_rate": 5.635149389181855e-05,
390
+ "loss": 0.0088,
391
+ "step": 2500
392
+ },
393
+ {
394
+ "epoch": 1.9592476489028212,
395
+ "eval_loss": 0.011125968769192696,
396
+ "eval_runtime": 438.9494,
397
+ "eval_samples_per_second": 2.586,
398
+ "eval_steps_per_second": 1.294,
399
+ "step": 2500
400
+ },
401
+ {
402
+ "epoch": 1.9984326018808778,
403
+ "grad_norm": 0.038330078125,
404
+ "learning_rate": 5.260041288400284e-05,
405
+ "loss": 0.0092,
406
+ "step": 2550
407
+ },
408
+ {
409
+ "epoch": 2.0376175548589344,
410
+ "grad_norm": 0.056396484375,
411
+ "learning_rate": 4.893347105165468e-05,
412
+ "loss": 0.0064,
413
+ "step": 2600
414
+ },
415
+ {
416
+ "epoch": 2.0768025078369905,
417
+ "grad_norm": 0.0517578125,
418
+ "learning_rate": 4.535717759624677e-05,
419
+ "loss": 0.0065,
420
+ "step": 2650
421
+ },
422
+ {
423
+ "epoch": 2.115987460815047,
424
+ "grad_norm": 0.049560546875,
425
+ "learning_rate": 4.187788080899591e-05,
426
+ "loss": 0.0064,
427
+ "step": 2700
428
+ },
429
+ {
430
+ "epoch": 2.1551724137931036,
431
+ "grad_norm": 0.049072265625,
432
+ "learning_rate": 3.8501756801988675e-05,
433
+ "loss": 0.0062,
434
+ "step": 2750
435
+ },
436
+ {
437
+ "epoch": 2.19435736677116,
438
+ "grad_norm": 0.0908203125,
439
+ "learning_rate": 3.5234798544942914e-05,
440
+ "loss": 0.0063,
441
+ "step": 2800
442
+ },
443
+ {
444
+ "epoch": 2.2335423197492164,
445
+ "grad_norm": 0.0595703125,
446
+ "learning_rate": 3.208280522706602e-05,
447
+ "loss": 0.0066,
448
+ "step": 2850
449
+ },
450
+ {
451
+ "epoch": 2.2727272727272725,
452
+ "grad_norm": 0.044677734375,
453
+ "learning_rate": 2.9051371962893358e-05,
454
+ "loss": 0.0063,
455
+ "step": 2900
456
+ },
457
+ {
458
+ "epoch": 2.311912225705329,
459
+ "grad_norm": 0.05224609375,
460
+ "learning_rate": 2.6145879860380773e-05,
461
+ "loss": 0.0065,
462
+ "step": 2950
463
+ },
464
+ {
465
+ "epoch": 2.3510971786833856,
466
+ "grad_norm": 0.0908203125,
467
+ "learning_rate": 2.337148646888061e-05,
468
+ "loss": 0.0065,
469
+ "step": 3000
470
+ },
471
+ {
472
+ "epoch": 2.3510971786833856,
473
+ "eval_loss": 0.01185206975787878,
474
+ "eval_runtime": 438.7429,
475
+ "eval_samples_per_second": 2.587,
476
+ "eval_steps_per_second": 1.295,
477
+ "step": 3000
478
  }
479
  ],
480
  "logging_steps": 50,
 
482
  "num_input_tokens_seen": 0,
483
  "num_train_epochs": 3,
484
  "save_steps": 500,
485
+ "total_flos": 3.3539015835648e+18,
486
  "train_batch_size": 2,
487
  "trial_name": null,
488
  "trial_params": null