georgeyw commited on Mar 14, 2024

Commit

47e28cc

verified ·

1 Parent(s): 3abe7ab

Training in progress, step 13000, checkpoint

Browse files

Files changed (30) hide show

checkpoint-12100/config.json +31 -0
checkpoint-12100/model.safetensors +3 -0
checkpoint-12100/training_args.bin +3 -0
checkpoint-12200/config.json +31 -0
checkpoint-12200/model.safetensors +3 -0
checkpoint-12200/training_args.bin +3 -0
checkpoint-12300/config.json +31 -0
checkpoint-12300/model.safetensors +3 -0
checkpoint-12300/training_args.bin +3 -0
checkpoint-12400/config.json +31 -0
checkpoint-12400/model.safetensors +3 -0
checkpoint-12400/training_args.bin +3 -0
checkpoint-12500/config.json +31 -0
checkpoint-12500/model.safetensors +3 -0
checkpoint-12500/training_args.bin +3 -0
checkpoint-12600/config.json +31 -0
checkpoint-12600/model.safetensors +3 -0
checkpoint-12600/training_args.bin +3 -0
checkpoint-12700/config.json +31 -0
checkpoint-12700/model.safetensors +3 -0
checkpoint-12700/training_args.bin +3 -0
checkpoint-12800/config.json +31 -0
checkpoint-12800/model.safetensors +3 -0
checkpoint-12800/training_args.bin +3 -0
checkpoint-12900/config.json +31 -0
checkpoint-12900/model.safetensors +3 -0
checkpoint-12900/training_args.bin +3 -0
checkpoint-13000/config.json +31 -0
checkpoint-13000/model.safetensors +3 -0
checkpoint-13000/training_args.bin +3 -0

checkpoint-12100/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12100/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0f555b874cb9ebe975f93427a26c60d3c60d01e1728c0fb96cc6c0e9f7f5893
+size 324662984

checkpoint-12100/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12200/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9591cffbdd95ec607ecbc952f4137a66f4dcf1400c10faca3bb23269cd709e20
+size 324662984

checkpoint-12200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12300/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12300/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bf9296e12037b21cd7362c8c28ea7efed896a089a93af298f56300e15163d88
+size 324662984

checkpoint-12300/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12400/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c141e5492e8190c5f867d1857ae792a154ac66ceaec872cd8a0d73b1ff40578c
+size 324662984

checkpoint-12400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12500/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d71096620aca34647e9f6bc9576976765b133a2c5bd11e24364613b84bc2bc1c
+size 324662984

checkpoint-12500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12600/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12600/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a339a8eba99999dcee345e15b46ccb98a9ef023bcdf5b84d22c026a19e9ade92
+size 324662984

checkpoint-12600/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12700/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12700/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05fc4ea5115ae015332fe9f2ca20febd78a80356a325b40581d745255fc8d658
+size 324662984

checkpoint-12700/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12800/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4592f43e8713a9ac8c4baff25e76cd2afb06cc812dda27e89d1a23d88c510f7f
+size 324662984

checkpoint-12800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-12900/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-12900/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ca667d2f2ac362968d2737801a46f90fdd220f03b4ef385fcd4dd6edfb79ee5
+size 324662984

checkpoint-12900/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-13000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-13000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44778d96aeb424090bf5e7f13ea42fc282757ad8ad02581fb812527e0c0efa1a
+size 324662984

checkpoint-13000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520