End of training

Browse files

Files changed (7) hide show

README.md +5 -4
adapter_config.json +39 -0
adapter_model.safetensors +3 -0
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +120 -120
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: Gensyn/Qwen2.5-0.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-cunning_padded_salmon
 tags:
@@ -8,13 +8,14 @@ tags:
 - grpo
 - gensyn
 - I am cunning padded salmon
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-cunning_padded_salmon
-This model is a fine-tuned version of [Gensyn/Qwen2.5-0.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-0.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -39,8 +40,8 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
 - TRL: 0.15.2
 - Transformers: 4.51.3
-- Pytorch: 2.5.1
-- Datasets: 3.5.0
 - Tokenizers: 0.21.1
 ## Citations

 ---
+base_model: Gensyn/Qwen2.5-1.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-cunning_padded_salmon
 tags:
 - grpo
 - gensyn
 - I am cunning padded salmon
+- unsloth
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-cunning_padded_salmon
+This model is a fine-tuned version of [Gensyn/Qwen2.5-1.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 - TRL: 0.15.2
 - Transformers: 4.51.3
+- Pytorch: 2.6.0
+- Datasets: 3.6.0
 - Tokenizers: 0.21.1
 ## Citations

adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Gensyn/Qwen2.5-1.5B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "o_proj",
+    "down_proj",
+    "k_proj",
+    "gate_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d1d11057d4e472335adc84e72daf635ef52935413ad483e0696280220835f51
+size 73911112

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 5.147978663444519e-06,
-    "train_runtime": 321.291,
-    "train_samples": 28,
-    "train_samples_per_second": 0.996,
-    "train_steps_per_second": 0.062
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.659178060974824e-07,
+    "train_runtime": 1202.0434,
+    "train_samples": 18,
+    "train_samples_per_second": 0.266,
+    "train_steps_per_second": 0.017
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 5.147978663444519e-06,
-    "train_runtime": 321.291,
-    "train_samples": 28,
-    "train_samples_per_second": 0.996,
-    "train_steps_per_second": 0.062
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.659178060974824e-07,
+    "train_runtime": 1202.0434,
+    "train_samples": 18,
+    "train_samples_per_second": 0.266,
+    "train_steps_per_second": 0.017
 }

trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.0,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
@@ -10,209 +10,209 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "completion_length": 342.96875,
-      "epoch": 0.5714285714285714,
-      "grad_norm": 5.793670654296875,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0,
-      "reward": 2.103108564391732,
-      "reward_std": 1.210430834442377,
-      "rewards/concensus_correctness_reward_func": 0.31837500631809235,
-      "rewards/consensus_reward_func": 0.5625,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.0,
-      "rewards/question_recreation_reward_func": 0.538921034662053,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.6833125085104257,
       "step": 2
     },
     {
-      "completion_length": 395.7083333333333,
-      "epoch": 1.0,
-      "grad_norm": 3.316585063934326,
-      "kl": 0.001008606399409473,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
-      "reward": 1.8855041489005089,
-      "reward_std": 1.0255667748861015,
-      "rewards/concensus_correctness_reward_func": 0.22416667143503824,
-      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.3333333333333333,
-      "rewards/question_recreation_reward_func": 0.6617124912639459,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.41629168142875034,
       "step": 4
     },
     {
-      "completion_length": 314.9375,
-      "epoch": 1.5714285714285714,
-      "grad_norm": 9.464887619018555,
-      "kl": 0.0030845321743981913,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
-      "reward": 2.451855756342411,
-      "reward_std": 1.917446899227798,
-      "rewards/concensus_correctness_reward_func": 0.5015000030398369,
-      "rewards/consensus_reward_func": 0.5,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.1875,
-      "rewards/question_recreation_reward_func": 0.616605784278363,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.6462500058114529,
       "step": 6
     },
     {
-      "completion_length": 364.4583333333333,
-      "epoch": 2.0,
-      "grad_norm": 3.3119776248931885,
-      "kl": 0.004234734262960653,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
-      "reward": 1.529069220026334,
-      "reward_std": 1.0711190002039075,
-      "rewards/concensus_correctness_reward_func": 0.17208333810170492,
-      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.0,
-      "rewards/question_recreation_reward_func": 0.7534442593653997,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.35354167968034744,
       "step": 8
     },
     {
-      "completion_length": 305.3125,
-      "epoch": 2.571428571428571,
-      "grad_norm": 8.920665740966797,
-      "kl": 0.006920879794051871,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
-      "reward": 2.1907591819763184,
-      "reward_std": 0.7096769977360964,
-      "rewards/concensus_correctness_reward_func": 0.2839374989271164,
-      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.6164779150858521,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.7278437400236726,
       "step": 10
     },
     {
-      "completion_length": 272.625,
-      "epoch": 3.0,
-      "grad_norm": 3.3465256690979004,
-      "kl": 0.0063869420264381915,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
-      "reward": 2.122172156969706,
-      "reward_std": 0.5801232105586678,
-      "rewards/concensus_correctness_reward_func": 0.2299166719118754,
-      "rewards/consensus_reward_func": 0.3333333333333333,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.16666666666666666,
-      "rewards/question_recreation_reward_func": 0.6718804923196634,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.720374991496404,
       "step": 12
     },
     {
-      "completion_length": 354.84375,
-      "epoch": 3.571428571428571,
-      "grad_norm": 5.003468036651611,
-      "kl": 0.016760691243689507,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
-      "reward": 1.8695098906755447,
-      "reward_std": 1.2882013304333668,
-      "rewards/concensus_correctness_reward_func": 0.2758750058710575,
-      "rewards/consensus_reward_func": 0.375,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.5972911030985415,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.4963437505066395,
       "step": 14
     },
     {
-      "completion_length": 503.0,
-      "epoch": 4.0,
-      "grad_norm": 4.424561023712158,
-      "kl": 0.0056549445337926345,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
-      "reward": 1.4202980945507686,
-      "reward_std": 1.4158651794617374,
-      "rewards/concensus_correctness_reward_func": 0.17708333333333334,
       "rewards/consensus_reward_func": 0.4166666666666667,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.16666666666666666,
-      "rewards/question_recreation_reward_func": 0.5948813930153847,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.06499999016523361,
       "step": 16
     },
     {
-      "completion_length": 401.0,
-      "epoch": 4.571428571428571,
-      "grad_norm": 5.2076849937438965,
-      "kl": 0.007878506650740746,
       "learning_rate": 3.013156219837776e-08,
       "loss": 0.0,
-      "reward": 2.6692092455923557,
-      "reward_std": 0.7308505216351477,
-      "rewards/concensus_correctness_reward_func": 0.48237501084804535,
-      "rewards/consensus_reward_func": 0.5,
       "rewards/cumulative_reward_2": 0.0,
       "rewards/final_correctness_reward_func": 0.25,
-      "rewards/question_recreation_reward_func": 0.6628029751591384,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.7740312479436398,
       "step": 18
     },
     {
-      "completion_length": 262.9166666666667,
-      "epoch": 5.0,
-      "grad_norm": 3.2780354022979736,
-      "kl": 0.008317624412787458,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
-      "reward": 2.023281673590342,
-      "reward_std": 0.8200806056459745,
-      "rewards/concensus_correctness_reward_func": 0.09125000238418579,
-      "rewards/consensus_reward_func": 0.3333333333333333,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.0,
-      "rewards/question_recreation_reward_func": 0.6847400286545356,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9139583359162012,
       "step": 20
     },
     {
-      "epoch": 5.0,
       "step": 20,
       "total_flos": 0.0,
-      "train_loss": 5.147978663444519e-06,
-      "train_runtime": 321.291,
-      "train_samples_per_second": 0.996,
-      "train_steps_per_second": 0.062
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 7,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -227,7 +227,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "completion_length": 442.625,
+      "epoch": 0.4444444444444444,
+      "grad_norm": 3.5712506771087646,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0,
+      "reward": 1.5570517890155315,
+      "reward_std": 1.2602613121271133,
+      "rewards/concensus_correctness_reward_func": 0.2447499930858612,
+      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.3125,
+      "rewards/question_recreation_reward_func": 0.5743955131620169,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.30040625110268593,
       "step": 2
     },
     {
+      "completion_length": 303.4375,
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.8086029887199402,
+      "kl": 0.0008120334314298816,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
+      "reward": 2.0302220284938812,
+      "reward_std": 2.375204414129257,
+      "rewards/concensus_correctness_reward_func": 0.7447500005364418,
+      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.25,
+      "rewards/question_recreation_reward_func": 0.4745032652281225,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.12346874875947833,
       "step": 4
     },
     {
+      "completion_length": 439.625,
+      "epoch": 1.2222222222222223,
+      "grad_norm": 0.520850419998169,
+      "kl": 0.0007262564128420005,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
+      "reward": 1.7100990017255147,
+      "reward_std": 1.4609887500603993,
+      "rewards/concensus_correctness_reward_func": 0.13633333643277487,
+      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.5,
+      "rewards/question_recreation_reward_func": 0.5773489971955618,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.020833333333333332,
+      "rewards/xmlcount_reward_func": 0.2255833459397157,
       "step": 6
     },
     {
+      "completion_length": 376.0625,
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.5010330080986023,
+      "kl": 0.000765997203416191,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
+      "reward": 1.1689440682530403,
+      "reward_std": 1.6103120371699333,
+      "rewards/concensus_correctness_reward_func": 0.1223749965429306,
+      "rewards/consensus_reward_func": 0.375,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.25,
+      "rewards/question_recreation_reward_func": 0.39806905947625637,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.023500001057982445,
       "step": 8
     },
     {
+      "completion_length": 487.2916666666667,
+      "epoch": 2.0,
+      "grad_norm": 2.3322558403015137,
+      "kl": 0.000808947918509754,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
+      "reward": 0.6228826468189558,
+      "reward_std": 0.9896061917146047,
+      "rewards/concensus_correctness_reward_func": -0.037249999741713204,
+      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.3952576319376628,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.014874995996554693,
       "step": 10
     },
     {
+      "completion_length": 352.6875,
+      "epoch": 2.4444444444444446,
+      "grad_norm": 0.4322264790534973,
+      "kl": 0.0008172135276254267,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
+      "reward": 1.4260139390826225,
+      "reward_std": 0.7877290993928909,
+      "rewards/concensus_correctness_reward_func": 0.0,
+      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.5625,
+      "rewards/question_recreation_reward_func": 0.48057645559310913,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.13293749885633588,
       "step": 12
     },
     {
+      "completion_length": 396.875,
+      "epoch": 2.888888888888889,
+      "grad_norm": 7.138157844543457,
+      "kl": 0.0008965203742263839,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
+      "reward": 1.3820095993578434,
+      "reward_std": 0.8838027436286211,
+      "rewards/concensus_correctness_reward_func": 0.011937499977648258,
+      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.3125,
+      "rewards/question_recreation_reward_func": 0.478915823623538,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.03125,
+      "rewards/xmlcount_reward_func": 0.35990624874830246,
       "step": 14
     },
     {
+      "completion_length": 388.1666666666667,
+      "epoch": 3.2222222222222223,
+      "grad_norm": 0.5325118899345398,
+      "kl": 0.0008106547563026348,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
+      "reward": 1.5594792763392131,
+      "reward_std": 1.1592264771461487,
+      "rewards/concensus_correctness_reward_func": 0.17558333401878676,
       "rewards/consensus_reward_func": 0.4166666666666667,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.25,
+      "rewards/question_recreation_reward_func": 0.4924376308917999,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.041666666666666664,
+      "rewards/xmlcount_reward_func": 0.18312499672174454,
       "step": 16
     },
     {
+      "completion_length": 357.625,
+      "epoch": 3.6666666666666665,
+      "grad_norm": 0.5567435622215271,
+      "kl": 0.0008857284192345105,
       "learning_rate": 3.013156219837776e-08,
       "loss": 0.0,
+      "reward": 1.7179866097867489,
+      "reward_std": 1.5279813185334206,
+      "rewards/concensus_correctness_reward_func": 0.38906247541308403,
+      "rewards/consensus_reward_func": 0.3125,
       "rewards/cumulative_reward_2": 0.0,
       "rewards/final_correctness_reward_func": 0.25,
+      "rewards/question_recreation_reward_func": 0.481955349445343,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.015625,
+      "rewards/xmlcount_reward_func": 0.26884375885128975,
       "step": 18
     },
     {
+      "completion_length": 408.7083333333333,
+      "epoch": 4.0,
+      "grad_norm": 0.40472686290740967,
+      "kl": 0.0008299552525083224,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
+      "reward": 0.7702692846457163,
+      "reward_std": 0.6618979672590891,
+      "rewards/concensus_correctness_reward_func": 0.0,
+      "rewards/consensus_reward_func": 0.08333333333333333,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.08333333333333333,
+      "rewards/question_recreation_reward_func": 0.33676928902665776,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.020833333333333332,
+      "rewards/xmlcount_reward_func": 0.24600000927845636,
       "step": 20
     },
     {
+      "epoch": 4.0,
       "step": 20,
       "total_flos": 0.0,
+      "train_loss": 6.659178060974824e-07,
+      "train_runtime": 1202.0434,
+      "train_samples_per_second": 0.266,
+      "train_steps_per_second": 0.017
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6691da7da90329f4391e740ee394783f31969fc984033caab38c654b88a80846
-size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d1cb0d5321b1067e122a74129d957e48ea0e50a33e7d2efe8bc7e2c8521b632
+size 5944