Training in progress, step 61875
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +39 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813671.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724814951.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815454.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815924.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816923.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724812288.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813291.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814328.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814686.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815189.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815657.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816091.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816319.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816651.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724817783.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724818071.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813806.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817059.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813533.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814463.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814821.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815323.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815791.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816453.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816788.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813739.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815016.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815521.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816991.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813466.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814395.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814754.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815256.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815725.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816160.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816387.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816721.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813874.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817125.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813602.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814529.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814886.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815389.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815858.d9703cfffc0c +3 -0
- logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816855.d9703cfffc0c +3 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 50256,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 50256,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 1024,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"torch_dtype": "bfloat16",
|
36 |
+
"transformers_version": "4.44.2",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50257
|
39 |
+
}
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813671.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50e555d6008e24fc8760436eaa8dbe5507768b3e65e4bd27b1f05b360a8b20d1
|
3 |
+
size 5563
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724814951.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eba9aa519ebc21f459a8278f6d2742392f42fe5996912e8e5d4fc9918212e9b9
|
3 |
+
size 5563
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815454.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c7c07c941393371936b2d6c534dc0f72115edb8a7e631ff4f29d5431934f713
|
3 |
+
size 5563
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815924.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dcb9086e1bb0afc5415c29f499f3ffd83bcaf957061f53870312ad7c1c39b66
|
3 |
+
size 5366
|
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816923.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7030d8e043a98b6a4b3a795408e5acd4c5be361c8057f390ab53211f0714dd3
|
3 |
+
size 5563
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724812288.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf176dba7f8017d2d8b96927cf9c1cdc54d72340bf08811af5c50243780eae67
|
3 |
+
size 2079109
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813291.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41f4458874e525923a08b560ea1bbc9fa3a64d05a1aa13e2dfe882873531072a
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814328.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5d503414037df529afcbf26f093ab519ae2102d0581a1fa8788316c0353bdb5
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814686.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:032ef1fafbb6ad5996b866d36b808f2b3b36a6b42bf5182504522b04d5908fcb
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815189.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c28f410a00d90af22ba64134971cafe3e8d22948b2b93a0913e27f6cea0b4f08
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815657.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb8115df2abecd47c416b2a1a299566564a219386cc1ce9410cf7298f8823c1
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816091.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1d03a2cbe54c1bfc8689dab18a3a3d91ad9bc44fa5ddf46589e850c450d9dc1
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816319.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:340cb24e72c9ae1c6f252982c0452631eacbd8832987cb7e31196a4efc6748bc
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816651.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55359cb9857efd480a464b20d7afae740e07837f47505c7195eb437cf733d864
|
3 |
+
size 5571
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724817783.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca8a18fdc6b59d1825af35b8562fdd5d45d7bc844be3e3dc03e22cf1caf0131c
|
3 |
+
size 167981
|
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724818071.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2edf6fc77473b3105ecad169b74f6536553c72ae2561a8b9ce4b1a6d2af9b0d0
|
3 |
+
size 29625293
|
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813806.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:323ddc0f2136ca6aebe52a516778bef4d9c827aa199cf7d1dc4a0e2c8197bbf1
|
3 |
+
size 5565
|
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817059.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c067f17aa8effb9c87482b136ace71922e98e6d080ea0c578d58d49598c6853a
|
3 |
+
size 5565
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813533.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1045998582b94de0ede9d772f62934323a5a70cd648b9d542ab35016284eec9
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814463.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ea1f534bbcc5ebed77dab1c60ce5eff9509a1526d6c605af2a812167fc7d07c
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814821.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a9cf0ff62f82e8d3be9a32ed18ed68fb3b7fdbbdd8c6a616912af497aee57cc
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815323.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf238e6e33ef731631e70b8bae753ab39a4f58fbe2172da127ed369b34898297
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815791.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0248e4005969b1b76d44940fa72756dfdcd8bbb955ca331e333b321e391e2e68
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816453.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:478e73e45a95269a1ff12a5ff2ec54936521d8074426fef57906bbfe6d8a6d97
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816788.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1c6b97b735df21881b7d17e28b938379640bbc58a51bfabcab8cbb1cd5a0a7f
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813739.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84b25b878cf520cb8e3a5eac6eac60be82198c6efb638946efa741613d60c0b8
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815016.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ef2485b9bb157ce89aa5d97e2d7a21c30c7c69abf95ede99cb4bb2eb5a2b474
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815521.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a1eb192de97eca5beeea093f1324aed2ddc62cec6bc80c8acef633a4f791572
|
3 |
+
size 5376
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816991.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d864a0816de9acadd93fb2f1606109d90f77b53028d37380e01f8bca536f5c7
|
3 |
+
size 5573
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813466.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c8db3fdd3a6b5f66d849ebe1e4607b9add884f08d6641e2e1bb602f270c02f7
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814395.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fcfa1ec713693e00cc74937854f1517651f88d29895d9ca8b8ced7b7a4bd221
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814754.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a605b740e8797d1b238b387144da0ecc71b3b91658132d872fa19d0586cd0376
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815256.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb4aacac21548690032c9c2fafe24f2ab90ebd5060d6e335eefff680f7b830ed
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815725.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b8d21c1435458228b45aa58735d08161a5586a05b08e997d3b495a75e29bb70
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816160.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:893263a6350c715c7b615a7d67051a60e91a33b01a6f05080cfd6249072cba7a
|
3 |
+
size 5384
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816387.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:024f704036134ed8644da23fdc6aa7966fdd799c6870f30cca8a7ab5e3625db3
|
3 |
+
size 5581
|
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816721.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be34c679f6aaf5eb002d8a84d792469169dbf0c20b266064b4fba3ec409f8f4e
|
3 |
+
size 5581
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813874.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d0437135821486a9c4bf2a2306c83c3f772b1de98689c365275e5e92980572b
|
3 |
+
size 5571
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817125.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4833f365a0c4a5abe55d747c90144377a49c9cee4ebd77a195d8e60e309b9a8
|
3 |
+
size 5571
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813602.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93054fd38036a0a0f90999b7989e02de39509ce24239424405a46ed75a62230e
|
3 |
+
size 5579
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814529.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54c18e5de5018e864c8b44dd02412d0739fe91b89cf290a466aabfafa12f173f
|
3 |
+
size 5382
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814886.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d9aca457d5deaff2f58a16e4974066816616a59bacc71f6aa448983d7a10759
|
3 |
+
size 5579
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815389.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4521e08107df3c4b57ddfef8941908ff446198a8e0821d32d9744111101619e
|
3 |
+
size 5579
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815858.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:096a84f6801148a8bb65af522d412cd67f3669bfaccb99b51e9c83bb8ad93eb4
|
3 |
+
size 5579
|
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816855.d9703cfffc0c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6bb8755813af91b3259c0a5c1d7ef6a39110d413fff81c81de15585d518343a
|
3 |
+
size 5579
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d57c93d8c3a31a21c287c79276f463f7d57a8c6a60b007050d7c95bce0dcdf1c
|
3 |
+
size 248894656
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
6 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|