Training in progress, step 61875
Browse files- benchmarks.shelve.bak +0 -0
- benchmarks.shelve.dat +0 -0
- benchmarks.shelve.dir +0 -0
- config.json +39 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523661.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524283.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724522510.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523463.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523824.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524084.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524694.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524956.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524382.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523563.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523923.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524182.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524793.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523711.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524333.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523513.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523874.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524133.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524744.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724531592.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524431.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523613.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523982.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524232.e3f806ea38c9 +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524842.e3f806ea38c9 +3 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
- tokenizer_config.json +20 -0
- training_args.bin +3 -0
- vocab.json +0 -0
benchmarks.shelve.bak
ADDED
File without changes
|
benchmarks.shelve.dat
ADDED
File without changes
|
benchmarks.shelve.dir
ADDED
File without changes
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 50256,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 50256,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 1024,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"torch_dtype": "bfloat16",
|
36 |
+
"transformers_version": "4.44.1",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50257
|
39 |
+
}
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523661.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:577957b37ba54e360acf46062bc17809facd866a910252c9b8bc2bb40afda439
|
3 |
+
size 5558
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524283.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac4ad0345c302d748f83758efa2a88603d03ef3dd7f40c2450e93e8637d185a1
|
3 |
+
size 5558
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724522510.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77b25551da8952eb9a18fbb47288cbacb98f92f410617812604aecfca45ce56a
|
3 |
+
size 5566
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523463.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:548f2acdba18142803a364d454f8beb451c2598b7e69a02e20db82a26ae06ab1
|
3 |
+
size 5566
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523824.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ec061abfd76b50529d767b36d9afc0c87ff9c6a28df21296ff29784a6d62805
|
3 |
+
size 5566
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524084.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5c12de8f459f93d9a8484d78b3c5075e88ba9404d1307f1f2dd9484b43483c0
|
3 |
+
size 5566
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524694.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f732b9dee7a8b5edd257cc063aed97680bcb4a755da519fe8757f3a1b51dc21d
|
3 |
+
size 5566
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524956.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d180c5a2f2ada5e6286cf35996d62290b9823e1dfb6e4f66d6a0d4916a3cfdfa
|
3 |
+
size 29625288
|
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524382.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d4a889bb1c0d888ed9985115d54e8c36e8f2d1971ae33de6f1dd8a159404c40
|
3 |
+
size 5560
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523563.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae0bbffacfd69fbbd3c0d644bb795074e3986162a2e770a7c57d92f21ddd6d53
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523923.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eea059fd0eedaabdd623cb335f68407ee64a1b2c3bdc1318c1774e50a0e00ad6
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524182.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:325a1c84c196630995b6570e781194683ab06ddb79fca1c4607de4ae7ef4dae4
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524793.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fda0dbb3a9e5852a4e194f580d6d484cc35ea6e1244c52a198999e59f8672d5a
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724523711.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d63977c854e15443ea58b621ff070c55b98a4e60d9bd12b1de565e42dadf53
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524333.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d559755c0c20982c22f2d5a163a4e62f2f2d8602b90c0604f5531bec5d41cd8e
|
3 |
+
size 5568
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523513.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c582375ca7920dd4d88a1c9b9b629a849ecfaa52af57399904c6e1e5a7a80cda
|
3 |
+
size 5576
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523874.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17d283c55297b7269344beac70983a9162ceb36537a7b1247568a19bc5f8b4d0
|
3 |
+
size 5576
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524133.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df324b7eec7478095f73c19424a7a67e1e47f07bb9a4efd2e7b328929c9a26f9
|
3 |
+
size 5576
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524744.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41e9eec68a3dd610c34a297c4d2f49d09f1d5392f0f5c8a4fb28f9b91c113b27
|
3 |
+
size 5576
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724531592.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79878130fd48bddf8fca5df905effe213a05ee4c029b1ca7764cc41569024544
|
3 |
+
size 29625298
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=linear/events.out.tfevents.1724524431.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb30a32d9c344a26697a4469aec6f93929ab80ecd548d85f208edcc5dc952cb8
|
3 |
+
size 5566
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523613.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1c44e294d72eecf84afbac478d19f206d2543cd5647ce14d75c4e24a335b569
|
3 |
+
size 5574
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724523982.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7325cf3ad32ce767d1629ad09f470cb2561ace132be67f734c92e7eb7f867a2
|
3 |
+
size 5377
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524232.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:846f766c3117e34d12513c3c2dbd3b489244a2c3f073d54153da7ad2e6b05c4f
|
3 |
+
size 5574
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=linear/events.out.tfevents.1724524842.e3f806ea38c9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:355529149c2c153e8bed4dab7839ffb44fc92ea08ef5bd322c6b6b2424a0a8a4
|
3 |
+
size 5377
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fc7114b0258073c1910f5a664d2b0331fad70ba71908a4bd2a369b18b9c665f
|
3 |
+
size 248894656
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
6 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"50256": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"bos_token": "<|endoftext|>",
|
14 |
+
"clean_up_tokenization_spaces": true,
|
15 |
+
"eos_token": "<|endoftext|>",
|
16 |
+
"model_max_length": 1024,
|
17 |
+
"pad_token": "<|endoftext|>",
|
18 |
+
"tokenizer_class": "GPT2Tokenizer",
|
19 |
+
"unk_token": "<|endoftext|>"
|
20 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04a9f747b2b38e7a5b6965c3502c5eb4f5c466e974724b9ebc48fc9e0ca8afe4
|
3 |
+
size 5432
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|