End of training

Browse files

Files changed (9) hide show

README.md +2 -2
all_results.json +11 -11
eval_layer_index_29_mat_attn.attention.v_proj_results.json +9 -0
model-00001-of-00002.safetensors +2 -2
model.safetensors.index.json +13 -5
runs/Mar15_01-18-11_demo2/events.out.tfevents.1742042106.demo2.212607.2 +3 -0
runs/Mar15_01-18-11_demo2/events.out.tfevents.1742057348.demo2.212607.3 +3 -0
train_layer_index_29_mat_attn.attention.v_proj_results.json +8 -0
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 base_model: LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct
-datasets: testing_layer_29_Matrix_mlp.c_proj
 library_name: transformers
 model_name: output_exaone_tucker_rank16_newdata_b1_g2_gradient1_e1_b512
 tags:
@@ -12,7 +12,7 @@ licence: license
 # Model Card for output_exaone_tucker_rank16_newdata_b1_g2_gradient1_e1_b512
-This model is a fine-tuned version of [LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct) on the [testing_layer_29_Matrix_mlp.c_proj](https://huggingface.co/datasets/testing_layer_29_Matrix_mlp.c_proj) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct
+datasets: testing_layer_29_Matrix_attn.attention.v_proj
 library_name: transformers
 model_name: output_exaone_tucker_rank16_newdata_b1_g2_gradient1_e1_b512
 tags:
 # Model Card for output_exaone_tucker_rank16_newdata_b1_g2_gradient1_e1_b512
+This model is a fine-tuned version of [LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct) on the [testing_layer_29_Matrix_attn.attention.v_proj](https://huggingface.co/datasets/testing_layer_29_Matrix_attn.attention.v_proj) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-    "eval_accuracy": 0.8084207240251016,
-    "eval_loss": 1.6845703125,
-    "eval_runtime": 933.2948,
     "eval_samples": 25971,
-    "eval_samples_per_second": 27.827,
-    "eval_steps_per_second": 13.914,
-    "perplexity": 5.390134361325962,
-    "total_flos": 9.396165357758054e+17,
-    "train_loss": 1.722812063241433,
-    "train_runtime": 12082.0087,
     "train_samples": 294393,
-    "train_samples_per_second": 24.366,
-    "train_steps_per_second": 12.183
 }

 {
+    "eval_accuracy": 0.8090923506567009,
+    "eval_loss": 1.6630859375,
+    "eval_runtime": 949.3026,
     "eval_samples": 25971,
+    "eval_samples_per_second": 27.358,
+    "eval_steps_per_second": 13.68,
+    "perplexity": 5.2755658170209605,
+    "total_flos": 9.323659620390011e+17,
+    "train_loss": 1.669835741941364,
+    "train_runtime": 14265.6788,
     "train_samples": 294393,
+    "train_samples_per_second": 20.636,
+    "train_steps_per_second": 10.318
 }

eval_layer_index_29_mat_attn.attention.v_proj_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "eval_accuracy": 0.8090923506567009,
+    "eval_loss": 1.6630859375,
+    "eval_runtime": 949.3026,
+    "eval_samples": 25971,
+    "eval_samples_per_second": 27.358,
+    "eval_steps_per_second": 13.68,
+    "perplexity": 5.2755658170209605
+}

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d7f8ac55165156d24fd68aee4367a9732cf51e950a3f1e760a2a14a26fae8da
-size 4701521288

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dcf78f6714bde10266e3f9400e22a6573d0a01dbdef65f5e53d6b29dd7e1453
+size 4669288872

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 5225777664
   },
   "weight_map": {
     "lm_head.weight": "model-00002-of-00002.safetensors",
@@ -202,10 +202,18 @@
     "transformer.h.28.mlp.c_fc_0.weight": "model-00001-of-00002.safetensors",
     "transformer.h.28.mlp.c_fc_1.weight": "model-00001-of-00002.safetensors",
     "transformer.h.28.mlp.c_proj.weight": "model-00001-of-00002.safetensors",
-    "transformer.h.29.attn.attention.k_proj.weight": "model-00001-of-00002.safetensors",
-    "transformer.h.29.attn.attention.out_proj.weight": "model-00001-of-00002.safetensors",
-    "transformer.h.29.attn.attention.q_proj.weight": "model-00001-of-00002.safetensors",
-    "transformer.h.29.attn.attention.v_proj.weight": "model-00001-of-00002.safetensors",
     "transformer.h.29.ln_1.weight": "model-00001-of-00002.safetensors",
     "transformer.h.29.ln_2.weight": "model-00001-of-00002.safetensors",
     "transformer.h.29.mlp.c_fc_0.tucker_core": "model-00001-of-00002.safetensors",

 {
   "metadata": {
+    "total_size": 5193544192
   },
   "weight_map": {
     "lm_head.weight": "model-00002-of-00002.safetensors",
     "transformer.h.28.mlp.c_fc_0.weight": "model-00001-of-00002.safetensors",
     "transformer.h.28.mlp.c_fc_1.weight": "model-00001-of-00002.safetensors",
     "transformer.h.28.mlp.c_proj.weight": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.k_proj.tucker_core": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.k_proj.tucker_factors.0": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.k_proj.tucker_factors.1": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.out_proj.tucker_core": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.out_proj.tucker_factors.0": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.out_proj.tucker_factors.1": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.q_proj.tucker_core": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.q_proj.tucker_factors.0": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.q_proj.tucker_factors.1": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.v_proj.tucker_core": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.v_proj.tucker_factors.0": "model-00001-of-00002.safetensors",
+    "transformer.h.29.attn.attention.v_proj.tucker_factors.1": "model-00001-of-00002.safetensors",
     "transformer.h.29.ln_1.weight": "model-00001-of-00002.safetensors",
     "transformer.h.29.ln_2.weight": "model-00001-of-00002.safetensors",
     "transformer.h.29.mlp.c_fc_0.tucker_core": "model-00001-of-00002.safetensors",

runs/Mar15_01-18-11_demo2/events.out.tfevents.1742042106.demo2.212607.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31ecb633773d843b8760393649d5731486bb64a0998cdfe978cc4247e9f5b9c4
+size 88588

runs/Mar15_01-18-11_demo2/events.out.tfevents.1742057348.demo2.212607.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f33d18e169ebf196c3b52402913694d14174b4910c32ff5d56541b722c948ad2
+size 481

train_layer_index_29_mat_attn.attention.v_proj_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 9.323659620390011e+17,
+    "train_loss": 1.669835741941364,
+    "train_runtime": 14265.6788,
+    "train_samples": 294393,
+    "train_samples_per_second": 20.636,
+    "train_steps_per_second": 10.318
+}

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff