Fix model names
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- attn_out/layer_0/trainer_0/config.json +1 -1
- attn_out/layer_1/trainer_0/config.json +1 -1
- attn_out/layer_2/trainer_0/config.json +1 -1
- attn_out/layer_3/trainer_0/config.json +1 -1
- attn_out/layer_4/trainer_0/config.json +1 -1
- attn_out/layer_5/trainer_0/config.json +1 -1
- attn_out/layer_6/trainer_0/config.json +1 -1
- attn_out/layer_7/trainer_0/config.json +1 -1
- mlp_out/layer_0/trainer_0/config.json +1 -1
- mlp_out/layer_1/trainer_0/config.json +1 -1
- mlp_out/layer_2/trainer_0/config.json +1 -1
- mlp_out/layer_3/trainer_0/config.json +1 -1
- mlp_out/layer_4/trainer_0/config.json +1 -1
- mlp_out/layer_5/trainer_0/config.json +1 -1
- mlp_out/layer_6/trainer_0/config.json +1 -1
- mlp_out/layer_7/trainer_0/config.json +1 -1
- resid_post/layer_0/trainer_10/config.json +1 -1
- resid_post/layer_0/trainer_11/config.json +1 -1
- resid_post/layer_0/trainer_12/config.json +1 -1
- resid_post/layer_0/trainer_3/config.json +1 -1
- resid_post/layer_0/trainer_4/config.json +1 -1
- resid_post/layer_0/trainer_5/config.json +1 -1
- resid_post/layer_1/trainer_10/config.json +1 -1
- resid_post/layer_1/trainer_11/config.json +1 -1
- resid_post/layer_1/trainer_12/config.json +1 -1
- resid_post/layer_1/trainer_13/config.json +1 -1
- resid_post/layer_1/trainer_14/config.json +1 -1
- resid_post/layer_1/trainer_3/config.json +1 -1
- resid_post/layer_1/trainer_4/config.json +1 -1
- resid_post/layer_1/trainer_5/config.json +1 -1
- resid_post/layer_1/trainer_6/config.json +1 -1
- resid_post/layer_2/trainer_10/config.json +1 -1
- resid_post/layer_2/trainer_11/config.json +1 -1
- resid_post/layer_2/trainer_12/config.json +1 -1
- resid_post/layer_2/trainer_13/config.json +1 -1
- resid_post/layer_2/trainer_14/config.json +1 -1
- resid_post/layer_2/trainer_3/config.json +1 -1
- resid_post/layer_2/trainer_4/config.json +1 -1
- resid_post/layer_2/trainer_5/config.json +1 -1
- resid_post/layer_2/trainer_6/config.json +1 -1
- resid_post/layer_3/trainer_10/config.json +1 -1
- resid_post/layer_3/trainer_11/config.json +1 -1
- resid_post/layer_3/trainer_12/config.json +1 -1
- resid_post/layer_3/trainer_13/config.json +1 -1
- resid_post/layer_3/trainer_14/config.json +1 -1
- resid_post/layer_3/trainer_3/config.json +1 -1
- resid_post/layer_3/trainer_4/config.json +1 -1
- resid_post/layer_3/trainer_5/config.json +1 -1
- resid_post/layer_3/trainer_6/config.json +1 -1
- resid_post/layer_4/trainer_10/config.json +1 -1
attn_out/layer_0/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_1/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_2/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_3/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_4/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer4-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer4-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_5/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 5,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer5-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 5,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer5-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_6/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 6,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer6-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 6,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer6-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
attn_out/layer_7/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 7,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer7-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 7,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer7-ef8-k32-attn_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "attn_out"
|
mlp_out/layer_0/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_1/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_2/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_3/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_4/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer4-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer4-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_5/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 5,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer5-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 5,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer5-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_6/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 6,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer6-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 6,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer6-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
mlp_out/layer_7/trainer_0/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 7,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer7-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 7,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer7-ef8-k32-mlp_out",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "mlp_out"
|
resid_post/layer_0/trainer_10/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_0/trainer_11/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_0/trainer_12/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_0/trainer_3/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_0/trainer_4/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_0/trainer_5/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer0-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 0,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer0-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_10/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_11/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_12/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_13/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_1/trainer_14/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_1/trainer_3/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_4/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_5/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_1/trainer_6/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer1-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 1,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer1-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_2/trainer_10/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_11/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_12/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_13/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_2/trainer_14/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_2/trainer_3/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_4/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_5/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_2/trainer_6/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer2-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 2,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer2-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_3/trainer_10/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_11/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef16-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_12/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef16-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_13/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef16-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_3/trainer_14/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef32-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_3/trainer_3/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_4/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 32,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k32",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_5/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 64,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k64",
|
15 |
"submodule_name": null
|
16 |
},
|
resid_post/layer_3/trainer_6/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer3-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
|
|
10 |
"k": 128,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 3,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer3-ef8-k128-resid_post",
|
15 |
"submodule_name": null,
|
16 |
"submodule_type": "resid_post"
|
resid_post/layer_4/trainer_10/config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
-
"lm_name": "
|
14 |
"wandb_name": "TopK-layer4-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|
|
|
10 |
"k": 16,
|
11 |
"device": "cuda:0",
|
12 |
"layer": 4,
|
13 |
+
"lm_name": "mntss/Othello-GPT",
|
14 |
"wandb_name": "TopK-layer4-ef16-k16",
|
15 |
"submodule_name": null
|
16 |
},
|