AlekseyKorshuk
commited on
Commit
•
273af26
1
Parent(s):
86ac4b4
huggingartists
Browse files- README.md +3 -3
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +113 -7
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/coldplay")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/coldplay")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1gzc0ns4/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/22vgjn8r) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/22vgjn8r/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss":
|
|
|
1 |
+
{"eval_loss": 1.5336766242980957, "eval_runtime": 2.7958, "eval_samples_per_second": 21.461, "eval_steps_per_second": 2.861, "epoch": 9.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d06678e316f0244aae1a5d5def2e8ab05ebd44699021c19e283ec3d9b3eb7019
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f2f3a899d9271926fe05e74d2e1e78a5a284bbd2a99b96fa49ccf0da030a8c8
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74f16ce366fea146ff0b814bbb0ee1c52aa946330b16cade31e7d9eb5550b66a
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88fb4b11febc18ef05627a88f778f7e276b66959c10157ee7e09d9fd08d92c2e
|
3 |
+
size 14567
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b5e135d5638eeea038dbe56656468198b1ccb302b58ca9cca115146d8a6edf2
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "output/coldplay/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -520,11 +520,117 @@
|
|
520 |
"eval_samples_per_second": 75.98,
|
521 |
"eval_steps_per_second": 10.318,
|
522 |
"step": 376
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
523 |
}
|
524 |
],
|
525 |
-
"max_steps":
|
526 |
-
"num_train_epochs":
|
527 |
-
"total_flos":
|
528 |
"trial_name": null,
|
529 |
"trial_params": null
|
530 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5336766242980957,
|
3 |
+
"best_model_checkpoint": "output/coldplay/checkpoint-450",
|
4 |
+
"epoch": 9.0,
|
5 |
+
"global_step": 450,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
520 |
"eval_samples_per_second": 75.98,
|
521 |
"eval_steps_per_second": 10.318,
|
522 |
"step": 376
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"epoch": 7.6,
|
526 |
+
"learning_rate": 8.979856581412133e-05,
|
527 |
+
"loss": 1.3418,
|
528 |
+
"step": 380
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"epoch": 7.7,
|
532 |
+
"learning_rate": 0.0001089220683072637,
|
533 |
+
"loss": 1.3258,
|
534 |
+
"step": 385
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 7.8,
|
538 |
+
"learning_rate": 0.00012409856581412136,
|
539 |
+
"loss": 1.67,
|
540 |
+
"step": 390
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"epoch": 7.9,
|
544 |
+
"learning_rate": 0.00013384247701784757,
|
545 |
+
"loss": 1.6416,
|
546 |
+
"step": 395
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"epoch": 8.0,
|
550 |
+
"learning_rate": 0.0001372,
|
551 |
+
"loss": 1.5041,
|
552 |
+
"step": 400
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"epoch": 8.0,
|
556 |
+
"eval_loss": 1.550144910812378,
|
557 |
+
"eval_runtime": 2.6997,
|
558 |
+
"eval_samples_per_second": 22.225,
|
559 |
+
"eval_steps_per_second": 2.963,
|
560 |
+
"step": 400
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"epoch": 8.1,
|
564 |
+
"learning_rate": 0.0001338424770178476,
|
565 |
+
"loss": 1.3775,
|
566 |
+
"step": 405
|
567 |
+
},
|
568 |
+
{
|
569 |
+
"epoch": 8.2,
|
570 |
+
"learning_rate": 0.00012409856581412158,
|
571 |
+
"loss": 1.3052,
|
572 |
+
"step": 410
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"epoch": 8.3,
|
576 |
+
"learning_rate": 0.00010892206830726361,
|
577 |
+
"loss": 1.4469,
|
578 |
+
"step": 415
|
579 |
+
},
|
580 |
+
{
|
581 |
+
"epoch": 8.4,
|
582 |
+
"learning_rate": 8.979856581412145e-05,
|
583 |
+
"loss": 1.5571,
|
584 |
+
"step": 420
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"epoch": 8.5,
|
588 |
+
"learning_rate": 6.859999999999995e-05,
|
589 |
+
"loss": 1.3572,
|
590 |
+
"step": 425
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 8.6,
|
594 |
+
"learning_rate": 4.740143418587868e-05,
|
595 |
+
"loss": 1.5739,
|
596 |
+
"step": 430
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"epoch": 8.7,
|
600 |
+
"learning_rate": 2.8277931692736505e-05,
|
601 |
+
"loss": 1.4984,
|
602 |
+
"step": 435
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"epoch": 8.8,
|
606 |
+
"learning_rate": 1.3101434185878506e-05,
|
607 |
+
"loss": 1.4842,
|
608 |
+
"step": 440
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 8.9,
|
612 |
+
"learning_rate": 3.3575229821524526e-06,
|
613 |
+
"loss": 1.3847,
|
614 |
+
"step": 445
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 9.0,
|
618 |
+
"learning_rate": 0.0,
|
619 |
+
"loss": 1.3742,
|
620 |
+
"step": 450
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 9.0,
|
624 |
+
"eval_loss": 1.5336766242980957,
|
625 |
+
"eval_runtime": 2.6931,
|
626 |
+
"eval_samples_per_second": 22.279,
|
627 |
+
"eval_steps_per_second": 2.971,
|
628 |
+
"step": 450
|
629 |
}
|
630 |
],
|
631 |
+
"max_steps": 450,
|
632 |
+
"num_train_epochs": 9,
|
633 |
+
"total_flos": 468235321344000.0,
|
634 |
"trial_name": null,
|
635 |
"trial_params": null
|
636 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2863
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b49270cf7d7730cd41580e35a9163dd952c0d60570cdecffcd61412186ef48
|
3 |
size 2863
|