diff --git a/small_mlp_out/Mo0_N100_S-1.pt b/small_mlp_out/Mo0_N100_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..e28f9850423622aef53a333628ad82276f7faeba --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af05b5086ef6459f0ba1f53ac36b1be4728306ad6e5717f8ccabad01a0c30a4d +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-10.pt b/small_mlp_out/Mo0_N100_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a58be4610776fe116c6eae7ca76247c33d1a334 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8641706d85ae2322d6e897ba585c87597c45382716e00d231923e30f544486 +size 619904 diff --git a/small_mlp_out/Mo0_N100_S-10_config.json b/small_mlp_out/Mo0_N100_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7f4017a585c93c1aeffa579946432b241234c9e --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-1_config.json b/small_mlp_out/Mo0_N100_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa3cfc27e70cec5692296e8da70e69c096f5ef8 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-2.pt b/small_mlp_out/Mo0_N100_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..63e31d14e334d2c6f233d1b2f3f065b6bba322fa --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e8772eea4c3a267847387c189ca2688ad687b2ace325fa23df939b33c20794 +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-2_config.json b/small_mlp_out/Mo0_N100_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36431dd73b67e728f5522ed9f640a6007925f8ae --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-3.pt b/small_mlp_out/Mo0_N100_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..00b325f9b956124b1f7894a7e4de4af344cb8104 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e7d311122c25a89e59cfe847d770db0b789336e3198783dddd726f8e699dc0 +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-3_config.json b/small_mlp_out/Mo0_N100_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b39e02a35e0574ad320847043075cabda7958488 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-4.pt b/small_mlp_out/Mo0_N100_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..73deae7b264ccc27afdb7c559985cd27a1121fc2 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ffe69e8783b55f4f602c0ed4932717af23e29541789f214407ce97050c58de +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-4_config.json b/small_mlp_out/Mo0_N100_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d195f48817a0e6361ff94167e66ad34c452fa42b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-5.pt b/small_mlp_out/Mo0_N100_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0aeb3192fc3fe860511559cef6e68904401eec5 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec97df2666e65b8ea931140c874d275d25cc7f843cd88d0071c54d5219f366d +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-5_config.json b/small_mlp_out/Mo0_N100_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..38afc6f9eb181620ef791924674530f503ab47f0 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-6.pt b/small_mlp_out/Mo0_N100_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..7799f2ff4cb46587d0d89e66d6a5f64119554e00 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa89c6746abe2576841458b4a77287285487450480bc56c059c21b31fa91e32 +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-6_config.json b/small_mlp_out/Mo0_N100_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..700425a301545f657ec3c209f09e35ba70e37188 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-7.pt b/small_mlp_out/Mo0_N100_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..a59a56b84cc2f12613938a6b0fed806344e0f1da --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3076f2a9ae2b1fcea12b3850de9e0fdf02781d4e6b373b79f2e198ae7025fd +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-7_config.json b/small_mlp_out/Mo0_N100_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0bf8e63f97ffd4afa69ff117c3694a8dc87098b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-8.pt b/small_mlp_out/Mo0_N100_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..728aab717055a91f3f46f3e89afc4efa738f44ef --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b321f175fcb8b0e62da6d3b26bd34ddde62c553de7d6e5add3cf45cde84fd7fe +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-8_config.json b/small_mlp_out/Mo0_N100_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..abc1a5ea293a9a26366fea3a9b352671a62f076b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S-9.pt b/small_mlp_out/Mo0_N100_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbffac4b56fec7f0eddfe84513c7c32e3b25845b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac62f06d7682d0c0cccd5ad74fdf75dec9485843aed47877524d6adc970d9ef +size 619896 diff --git a/small_mlp_out/Mo0_N100_S-9_config.json b/small_mlp_out/Mo0_N100_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d62cde9d7884baed7334e2be89d4a51fe58516a4 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S0.pt b/small_mlp_out/Mo0_N100_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ec66b45a40609a3dc0339e3923090c03251da3f --- /dev/null +++ b/small_mlp_out/Mo0_N100_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c52f40ba9aa6bb249e2ea562de4a2db109a58ab592601051f17cd7c22bf8875 +size 619888 diff --git a/small_mlp_out/Mo0_N100_S0_config.json b/small_mlp_out/Mo0_N100_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ab9fe1f574d97c711198eb529aa132b1024d2 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S1.pt b/small_mlp_out/Mo0_N100_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..9977c9fdb6e7b23b170043f73ae4a84777b74c4b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52538b9a86801cebb661e9db7fb2b94352e7cfe791324ef6a3c8fc3c2e98a645 +size 619888 diff --git a/small_mlp_out/Mo0_N100_S1_config.json b/small_mlp_out/Mo0_N100_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf70a7edc28a553bd48f8353d11f58b8351abdc8 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N100_S2.pt b/small_mlp_out/Mo0_N100_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..20d01a563882dd23e6942ae3f5ef609d9b93d3c3 --- /dev/null +++ b/small_mlp_out/Mo0_N100_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4ae6de80c7c941677af277440b611e3fe5e4a29bcd6bc0845e909710e7d1fc +size 619888 diff --git a/small_mlp_out/Mo0_N100_S2_config.json b/small_mlp_out/Mo0_N100_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ee76483ed79872e2f05d94188e8c0d34d9a030b --- /dev/null +++ b/small_mlp_out/Mo0_N100_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N100_S2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-1.pt b/small_mlp_out/Mo1_N100_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdc785d789608ae32d15fb2de9fb1d3ed4273fa9 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae27e97e8cd3eddd1046adfbf85eca5a2fcf577240c58d511b567055490a656 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-10.pt b/small_mlp_out/Mo1_N100_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca65e2fdfed2e2da72a94ecc63e0b0e784e8b4cf --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c6721b55d29dc0756d6cabd10d22bc88c0e151b372378b0b2aafd89785c262 +size 619904 diff --git a/small_mlp_out/Mo1_N100_S-10_config.json b/small_mlp_out/Mo1_N100_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0662a81946eed8a09855f961c67f3b352577892e --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-1_config.json b/small_mlp_out/Mo1_N100_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fbcbdefc7138ab2fef7f71479a68e2c1b50b2d33 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-2.pt b/small_mlp_out/Mo1_N100_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..b17ffcbce4977adde1534d408eb72fb1d25487e9 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d36ccb1fc71fb2c1e8d25e6dee60a9077df13d97a5c76186e340233bb4a8a9e +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-2_config.json b/small_mlp_out/Mo1_N100_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4be8095675dd57378d56a366f448a7ee9c94250d --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-3.pt b/small_mlp_out/Mo1_N100_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e1324e2e6ad3a4d38ca25dddb0e3e9ad619475c --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab9ea0f87d47b3d050fb8f1d688da77e682eb9aaf3ff0eaa8516cd2af5b88a97 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-3_config.json b/small_mlp_out/Mo1_N100_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2929ca7b3c21ef350f92edba718077904a356619 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-4.pt b/small_mlp_out/Mo1_N100_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..f79a6ca147ac86bf55ba5f358d4679b58c1de919 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e611abec7d2d6bf4abce5bcc9accf787df7a75dca13694e77575f7dd7c6a364b +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-4_config.json b/small_mlp_out/Mo1_N100_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..06b82d2bcface2ab893c3351b1640778468385cd --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-5.pt b/small_mlp_out/Mo1_N100_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..370a4ade3c76357554317298dde8f9f64cc55f89 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1487f02437b087ca73fbe5efc06a24f83be478714b4bd39b75c76ba426d4d3c4 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-5_config.json b/small_mlp_out/Mo1_N100_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..21f6c7722dbcaa9cc9693968ab3d747182788e71 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-6.pt b/small_mlp_out/Mo1_N100_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..52098f173c6ae5e016fc4bea3ff8fb2d16944200 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64fea0f7373b118c234f57372682a6272637828245d439bce1489762188e88c3 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-6_config.json b/small_mlp_out/Mo1_N100_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..998ec9513118c248705c5746691ac486fb012997 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-7.pt b/small_mlp_out/Mo1_N100_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c010d020cf792e79828ef5228665acf2f8fc21d --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69eb1cfdd040be79a5b5738a23fac9b88dff72af88656dfa99789ac1d7c7f5bb +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-7_config.json b/small_mlp_out/Mo1_N100_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffd02fdd0ddeec02009a1c4cb1fbf2c6fd126b02 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-8.pt b/small_mlp_out/Mo1_N100_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..376b2deff6a6b1e016e6967f2d7aa8bac42d6ef8 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad75daf6562638331d20c3f86fdb93917bf0a1aaf64c8c67acec709f1f335c04 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-8_config.json b/small_mlp_out/Mo1_N100_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e819d0eebd92f1afa790044f42916a674fa9c61 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S-9.pt b/small_mlp_out/Mo1_N100_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca843f541a0489389121d33264ffe7a651f7ff58 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2b6cc9dbfbe4ec2f3519a8854b996c63051477dd94a690f416af53c8cab335 +size 619896 diff --git a/small_mlp_out/Mo1_N100_S-9_config.json b/small_mlp_out/Mo1_N100_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d6e1acc8d528ed6f7337b8f86a0ec89908f709d --- /dev/null +++ b/small_mlp_out/Mo1_N100_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S0.pt b/small_mlp_out/Mo1_N100_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab90005215561319f56820fd6a64f8890da59085 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e842201d494a66f266a99bbf43ea5b03465873335a817094074267ab9e3dc857 +size 619888 diff --git a/small_mlp_out/Mo1_N100_S0_config.json b/small_mlp_out/Mo1_N100_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c480190d8ac20cc441a77e7f8fe20708b834500e --- /dev/null +++ b/small_mlp_out/Mo1_N100_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S1.pt b/small_mlp_out/Mo1_N100_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b04e725f607ce81abb9fbaf84006aa437b30cf5 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8343acec8e1ee07400aa2376f4797783f35b7ec7a536b908b833b30c1ee63314 +size 619888 diff --git a/small_mlp_out/Mo1_N100_S1_config.json b/small_mlp_out/Mo1_N100_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..545c902ebe12c3d3a268605232c0eab969a4a8df --- /dev/null +++ b/small_mlp_out/Mo1_N100_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N100_S2.pt b/small_mlp_out/Mo1_N100_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..21f5a153ce0f83c1623455f7e58e8106eed45ed5 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719d66a8889c59ac1b52dde6bf5cf61be95adb975301b06a7b1b52e96b7766c7 +size 619888 diff --git a/small_mlp_out/Mo1_N100_S2_config.json b/small_mlp_out/Mo1_N100_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d50dc1032ea1597cd89a3c5d2e2120967b365887 --- /dev/null +++ b/small_mlp_out/Mo1_N100_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 100, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N100_S2" +} \ No newline at end of file