lapp0 commited on
Commit
e8d285f
1 Parent(s): e7ad966

Training in progress, step 61875

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +39 -0
  2. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813671.d9703cfffc0c +3 -0
  3. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724814951.d9703cfffc0c +3 -0
  4. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815454.d9703cfffc0c +3 -0
  5. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815924.d9703cfffc0c +3 -0
  6. logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816923.d9703cfffc0c +3 -0
  7. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724812288.d9703cfffc0c +3 -0
  8. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813291.d9703cfffc0c +3 -0
  9. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814328.d9703cfffc0c +3 -0
  10. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814686.d9703cfffc0c +3 -0
  11. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815189.d9703cfffc0c +3 -0
  12. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815657.d9703cfffc0c +3 -0
  13. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816091.d9703cfffc0c +3 -0
  14. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816319.d9703cfffc0c +3 -0
  15. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816651.d9703cfffc0c +3 -0
  16. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724817783.d9703cfffc0c +3 -0
  17. logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724818071.d9703cfffc0c +3 -0
  18. logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813806.d9703cfffc0c +3 -0
  19. logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817059.d9703cfffc0c +3 -0
  20. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813533.d9703cfffc0c +3 -0
  21. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814463.d9703cfffc0c +3 -0
  22. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814821.d9703cfffc0c +3 -0
  23. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815323.d9703cfffc0c +3 -0
  24. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815791.d9703cfffc0c +3 -0
  25. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816453.d9703cfffc0c +3 -0
  26. logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816788.d9703cfffc0c +3 -0
  27. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813739.d9703cfffc0c +3 -0
  28. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815016.d9703cfffc0c +3 -0
  29. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815521.d9703cfffc0c +3 -0
  30. logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816991.d9703cfffc0c +3 -0
  31. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813466.d9703cfffc0c +3 -0
  32. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814395.d9703cfffc0c +3 -0
  33. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814754.d9703cfffc0c +3 -0
  34. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815256.d9703cfffc0c +3 -0
  35. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815725.d9703cfffc0c +3 -0
  36. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816160.d9703cfffc0c +3 -0
  37. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816387.d9703cfffc0c +3 -0
  38. logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816721.d9703cfffc0c +3 -0
  39. logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813874.d9703cfffc0c +3 -0
  40. logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817125.d9703cfffc0c +3 -0
  41. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813602.d9703cfffc0c +3 -0
  42. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814529.d9703cfffc0c +3 -0
  43. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814886.d9703cfffc0c +3 -0
  44. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815389.d9703cfffc0c +3 -0
  45. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815858.d9703cfffc0c +3 -0
  46. logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816855.d9703cfffc0c +3 -0
  47. merges.txt +0 -0
  48. model.safetensors +3 -0
  49. special_tokens_map.json +6 -0
  50. tokenizer.json +0 -0
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.44.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813671.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e555d6008e24fc8760436eaa8dbe5507768b3e65e4bd27b1f05b360a8b20d1
3
+ size 5563
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724814951.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba9aa519ebc21f459a8278f6d2742392f42fe5996912e8e5d4fc9918212e9b9
3
+ size 5563
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815454.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7c07c941393371936b2d6c534dc0f72115edb8a7e631ff4f29d5431934f713
3
+ size 5563
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815924.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dcb9086e1bb0afc5415c29f499f3ffd83bcaf957061f53870312ad7c1c39b66
3
+ size 5366
logs/attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816923.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7030d8e043a98b6a4b3a795408e5acd4c5be361c8057f390ab53211f0714dd3
3
+ size 5563
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724812288.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf176dba7f8017d2d8b96927cf9c1cdc54d72340bf08811af5c50243780eae67
3
+ size 2079109
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813291.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41f4458874e525923a08b560ea1bbc9fa3a64d05a1aa13e2dfe882873531072a
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814328.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d503414037df529afcbf26f093ab519ae2102d0581a1fa8788316c0353bdb5
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814686.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032ef1fafbb6ad5996b866d36b808f2b3b36a6b42bf5182504522b04d5908fcb
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815189.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28f410a00d90af22ba64134971cafe3e8d22948b2b93a0913e27f6cea0b4f08
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815657.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb8115df2abecd47c416b2a1a299566564a219386cc1ce9410cf7298f8823c1
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816091.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1d03a2cbe54c1bfc8689dab18a3a3d91ad9bc44fa5ddf46589e850c450d9dc1
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816319.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:340cb24e72c9ae1c6f252982c0452631eacbd8832987cb7e31196a4efc6748bc
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816651.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55359cb9857efd480a464b20d7afae740e07837f47505c7195eb437cf733d864
3
+ size 5571
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724817783.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8a18fdc6b59d1825af35b8562fdd5d45d7bc844be3e3dc03e22cf1caf0131c
3
+ size 167981
logs/attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724818071.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2edf6fc77473b3105ecad169b74f6536553c72ae2561a8b9ce4b1a6d2af9b0d0
3
+ size 29625293
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813806.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323ddc0f2136ca6aebe52a516778bef4d9c827aa199cf7d1dc4a0e2c8197bbf1
3
+ size 5565
logs/attn_layer_mapper=last, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817059.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c067f17aa8effb9c87482b136ace71922e98e6d080ea0c578d58d49598c6853a
3
+ size 5565
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813533.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1045998582b94de0ede9d772f62934323a5a70cd648b9d542ab35016284eec9
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814463.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ea1f534bbcc5ebed77dab1c60ce5eff9509a1526d6c605af2a812167fc7d07c
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814821.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9cf0ff62f82e8d3be9a32ed18ed68fb3b7fdbbdd8c6a616912af497aee57cc
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815323.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf238e6e33ef731631e70b8bae753ab39a4f58fbe2172da127ed369b34898297
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815791.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0248e4005969b1b76d44940fa72756dfdcd8bbb955ca331e333b321e391e2e68
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816453.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478e73e45a95269a1ff12a5ff2ec54936521d8074426fef57906bbfe6d8a6d97
3
+ size 5573
logs/attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816788.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c6b97b735df21881b7d17e28b938379640bbc58a51bfabcab8cbb1cd5a0a7f
3
+ size 5573
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813739.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b25b878cf520cb8e3a5eac6eac60be82198c6efb638946efa741613d60c0b8
3
+ size 5573
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815016.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef2485b9bb157ce89aa5d97e2d7a21c30c7c69abf95ede99cb4bb2eb5a2b474
3
+ size 5573
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724815521.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a1eb192de97eca5beeea093f1324aed2ddc62cec6bc80c8acef633a4f791572
3
+ size 5376
logs/attn_layer_mapper=last_k_2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724816991.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d864a0816de9acadd93fb2f1606109d90f77b53028d37380e01f8bca536f5c7
3
+ size 5573
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813466.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8db3fdd3a6b5f66d849ebe1e4607b9add884f08d6641e2e1bb602f270c02f7
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814395.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fcfa1ec713693e00cc74937854f1517651f88d29895d9ca8b8ced7b7a4bd221
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814754.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a605b740e8797d1b238b387144da0ecc71b3b91658132d872fa19d0586cd0376
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815256.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4aacac21548690032c9c2fafe24f2ab90ebd5060d6e335eefff680f7b830ed
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815725.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8d21c1435458228b45aa58735d08161a5586a05b08e997d3b495a75e29bb70
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816160.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:893263a6350c715c7b615a7d67051a60e91a33b01a6f05080cfd6249072cba7a
3
+ size 5384
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816387.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:024f704036134ed8644da23fdc6aa7966fdd799c6870f30cca8a7ab5e3625db3
3
+ size 5581
logs/attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816721.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be34c679f6aaf5eb002d8a84d792469169dbf0c20b266064b4fba3ec409f8f4e
3
+ size 5581
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724813874.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0437135821486a9c4bf2a2306c83c3f772b1de98689c365275e5e92980572b
3
+ size 5571
logs/attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal/events.out.tfevents.1724817125.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4833f365a0c4a5abe55d747c90144377a49c9cee4ebd77a195d8e60e309b9a8
3
+ size 5571
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724813602.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93054fd38036a0a0f90999b7989e02de39509ce24239424405a46ed75a62230e
3
+ size 5579
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814529.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c18e5de5018e864c8b44dd02412d0739fe91b89cf290a466aabfafa12f173f
3
+ size 5382
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724814886.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9aca457d5deaff2f58a16e4974066816616a59bacc71f6aa448983d7a10759
3
+ size 5579
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815389.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4521e08107df3c4b57ddfef8941908ff446198a8e0821d32d9744111101619e
3
+ size 5579
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724815858.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096a84f6801148a8bb65af522d412cd67f3669bfaccb99b51e9c83bb8ad93eb4
3
+ size 5579
logs/attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal/events.out.tfevents.1724816855.d9703cfffc0c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bb8755813af91b3259c0a5c1d7ef6a39110d413fff81c81de15585d518343a
3
+ size 5579
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d57c93d8c3a31a21c287c79276f463f7d57a8c6a60b007050d7c95bce0dcdf1c
3
+ size 248894656
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff