Training completed

Browse files

Files changed (4) hide show

README.md +7 -9
all_results.json +6 -6
train_results.json +6 -6
trainer_state.json +345 -30

README.md CHANGED Viewed

@@ -1,20 +1,20 @@
 ---
 library_name: peft
-license: apache-2.0
-base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - generated_from_trainer
 model-index:
-- name: refactored-code-llama-3-2-3b
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# refactored-code-llama-3-2-3b
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description
@@ -38,14 +38,12 @@ The following hyperparameters were used during training:
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
-- num_devices: 8
 - gradient_accumulation_steps: 10
-- total_train_batch_size: 160
-- total_eval_batch_size: 64
 - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.05
-- training_steps: 50
 ### Training results

 ---
 library_name: peft
+license: other
+base_model: Qwen/Qwen2.5-3B
 tags:
 - generated_from_trainer
 model-index:
+- name: single-node-single-gpu-qwen-custom-sft
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# single-node-single-gpu-qwen-custom-sft
+This model is a fine-tuned version of [Qwen/Qwen2.5-3B](https://huggingface.co/Qwen/Qwen2.5-3B) on an unknown dataset.
 ## Model description
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - gradient_accumulation_steps: 10
+- total_train_batch_size: 20
 - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.05
+- training_steps: 500
 ### Training results

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 7.158730158730159,
-    "total_flos": 6.287430713700516e+17,
-    "train_loss": 0.912325621843338,
-    "train_runtime": 619.2374,
-    "train_samples_per_second": 12.919,
-    "train_steps_per_second": 0.081
 }

 {
+    "epoch": 0.5,
+    "total_flos": 6.856066495152128e+17,
+    "train_loss": 0.07727908698283135,
+    "train_runtime": 9003.3191,
+    "train_samples_per_second": 1.111,
+    "train_steps_per_second": 0.056
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 7.158730158730159,
-    "total_flos": 6.287430713700516e+17,
-    "train_loss": 0.912325621843338,
-    "train_runtime": 619.2374,
-    "train_samples_per_second": 12.919,
-    "train_steps_per_second": 0.081
 }

 {
+    "epoch": 0.5,
+    "total_flos": 6.856066495152128e+17,
+    "train_loss": 0.07727908698283135,
+    "train_runtime": 9003.3191,
+    "train_samples_per_second": 1.111,
+    "train_steps_per_second": 0.056
 }

trainer_state.json CHANGED Viewed

@@ -2,63 +2,378 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.158730158730159,
   "eval_steps": 0,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.4761904761904763,
-      "grad_norm": 0.34064996242523193,
-      "learning_rate": 0.0001,
-      "loss": 4.3632,
       "step": 10
     },
     {
-      "epoch": 2.9523809523809526,
-      "grad_norm": 0.1575096845626831,
-      "learning_rate": 0.0001,
-      "loss": 0.0554,
       "step": 20
     },
     {
-      "epoch": 4.317460317460317,
-      "grad_norm": 0.16478388011455536,
       "learning_rate": 0.0001,
-      "loss": 0.0546,
       "step": 30
     },
     {
-      "epoch": 5.7936507936507935,
-      "grad_norm": 0.140571728348732,
       "learning_rate": 0.0001,
-      "loss": 0.0479,
       "step": 40
     },
     {
-      "epoch": 7.158730158730159,
-      "grad_norm": 0.1423598974943161,
       "learning_rate": 0.0001,
-      "loss": 0.0405,
       "step": 50
     },
     {
-      "epoch": 7.158730158730159,
-      "step": 50,
-      "total_flos": 6.287430713700516e+17,
-      "train_loss": 0.912325621843338,
-      "train_runtime": 619.2374,
-      "train_samples_per_second": 12.919,
-      "train_steps_per_second": 0.081
     }
   ],
   "logging_steps": 10,
-  "max_steps": 50,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 9,
-  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -71,7 +386,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.287430713700516e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5,
   "eval_steps": 0,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.01,
+      "grad_norm": 6.734264373779297,
+      "learning_rate": 8.576691395183485e-05,
+      "loss": 3.5009,
       "step": 10
     },
     {
+      "epoch": 0.02,
+      "grad_norm": 0.29098451137542725,
+      "learning_rate": 9.653382790366966e-05,
+      "loss": 0.1027,
       "step": 20
     },
     {
+      "epoch": 0.03,
+      "grad_norm": 0.09549916535615921,
       "learning_rate": 0.0001,
+      "loss": 0.0191,
       "step": 30
     },
     {
+      "epoch": 0.04,
+      "grad_norm": 0.3484920263290405,
       "learning_rate": 0.0001,
+      "loss": 0.0222,
       "step": 40
     },
     {
+      "epoch": 0.05,
+      "grad_norm": 0.33001908659935,
       "learning_rate": 0.0001,
+      "loss": 0.021,
       "step": 50
     },
     {
+      "epoch": 0.06,
+      "grad_norm": 0.057511646300554276,
+      "learning_rate": 0.0001,
+      "loss": 0.0135,
+      "step": 60
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.05701196566224098,
+      "learning_rate": 0.0001,
+      "loss": 0.0117,
+      "step": 70
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.043988876044750214,
+      "learning_rate": 0.0001,
+      "loss": 0.0107,
+      "step": 80
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.03720390796661377,
+      "learning_rate": 0.0001,
+      "loss": 0.0098,
+      "step": 90
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.0470854677259922,
+      "learning_rate": 0.0001,
+      "loss": 0.0091,
+      "step": 100
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.035510435700416565,
+      "learning_rate": 0.0001,
+      "loss": 0.0075,
+      "step": 110
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.0346401073038578,
+      "learning_rate": 0.0001,
+      "loss": 0.0069,
+      "step": 120
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.0329650416970253,
+      "learning_rate": 0.0001,
+      "loss": 0.0064,
+      "step": 130
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.056529607623815536,
+      "learning_rate": 0.0001,
+      "loss": 0.0061,
+      "step": 140
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.049417588859796524,
+      "learning_rate": 0.0001,
+      "loss": 0.0058,
+      "step": 150
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.031275127083063126,
+      "learning_rate": 0.0001,
+      "loss": 0.0046,
+      "step": 160
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 0.026077693328261375,
+      "learning_rate": 0.0001,
+      "loss": 0.0043,
+      "step": 170
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 0.03110571764409542,
+      "learning_rate": 0.0001,
+      "loss": 0.0035,
+      "step": 180
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 0.0256363395601511,
+      "learning_rate": 0.0001,
+      "loss": 0.0039,
+      "step": 190
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.13061155378818512,
+      "learning_rate": 0.0001,
+      "loss": 0.0042,
+      "step": 200
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 0.022342098876833916,
+      "learning_rate": 0.0001,
+      "loss": 0.0029,
+      "step": 210
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 0.06658010929822922,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 220
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 0.02203432098031044,
+      "learning_rate": 0.0001,
+      "loss": 0.0028,
+      "step": 230
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.04879545792937279,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 240
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 0.044768281280994415,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 250
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 0.030401039868593216,
+      "learning_rate": 0.0001,
+      "loss": 0.0018,
+      "step": 260
+    },
+    {
+      "epoch": 0.27,
+      "grad_norm": 0.10380243510007858,
+      "learning_rate": 0.0001,
+      "loss": 0.0015,
+      "step": 270
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 0.019732531160116196,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 280
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 0.015292245894670486,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 290
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.030675368383526802,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 300
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.029702844098210335,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 310
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.016342662274837494,
+      "learning_rate": 0.0001,
+      "loss": 0.0014,
+      "step": 320
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.013499235734343529,
+      "learning_rate": 0.0001,
+      "loss": 0.0013,
+      "step": 330
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.011413372121751308,
+      "learning_rate": 0.0001,
+      "loss": 0.0012,
+      "step": 340
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 0.09215894341468811,
+      "learning_rate": 0.0001,
+      "loss": 0.0073,
+      "step": 350
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.06609797477722168,
+      "learning_rate": 0.0001,
+      "loss": 0.0084,
+      "step": 360
+    },
+    {
+      "epoch": 0.37,
+      "grad_norm": 0.03970978036522865,
+      "learning_rate": 0.0001,
+      "loss": 0.0075,
+      "step": 370
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 0.029625259339809418,
+      "learning_rate": 0.0001,
+      "loss": 0.0059,
+      "step": 380
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 0.02456456422805786,
+      "learning_rate": 0.0001,
+      "loss": 0.005,
+      "step": 390
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.03191933035850525,
+      "learning_rate": 0.0001,
+      "loss": 0.0045,
+      "step": 400
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 0.01918269693851471,
+      "learning_rate": 0.0001,
+      "loss": 0.0037,
+      "step": 410
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 0.018161766231060028,
+      "learning_rate": 0.0001,
+      "loss": 0.0031,
+      "step": 420
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 0.019575210288167,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 430
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 0.026317287236452103,
+      "learning_rate": 0.0001,
+      "loss": 0.0023,
+      "step": 440
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 0.040029872208833694,
+      "learning_rate": 0.0001,
+      "loss": 0.0026,
+      "step": 450
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 0.013975433073937893,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 460
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 0.03210354968905449,
+      "learning_rate": 0.0001,
+      "loss": 0.0017,
+      "step": 470
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.01889188587665558,
+      "learning_rate": 0.0001,
+      "loss": 0.0019,
+      "step": 480
+    },
+    {
+      "epoch": 0.49,
+      "grad_norm": 0.013832672499120235,
+      "learning_rate": 0.0001,
+      "loss": 0.0016,
+      "step": 490
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.057756196707487106,
+      "learning_rate": 0.0001,
+      "loss": 0.0018,
+      "step": 500
+    },
+    {
+      "epoch": 0.5,
+      "step": 500,
+      "total_flos": 6.856066495152128e+17,
+      "train_loss": 0.07727908698283135,
+      "train_runtime": 9003.3191,
+      "train_samples_per_second": 1.111,
+      "train_steps_per_second": 0.056
     }
   ],
   "logging_steps": 10,
+  "max_steps": 500,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 1000,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 6.856066495152128e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null