End of training

Browse files

Files changed (7) hide show

README.md +5 -4
adapter_config.json +39 -0
adapter_model.safetensors +3 -0
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +127 -127
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: Gensyn/Qwen2.5-0.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-lazy_beaked_camel
 tags:
@@ -8,13 +8,14 @@ tags:
 - grpo
 - gensyn
 - I am lazy beaked camel
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-lazy_beaked_camel
-This model is a fine-tuned version of [Gensyn/Qwen2.5-0.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-0.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -39,8 +40,8 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
 - TRL: 0.15.2
 - Transformers: 4.51.3
-- Pytorch: 2.5.1
-- Datasets: 3.5.0
 - Tokenizers: 0.21.1
 ## Citations

 ---
+base_model: Gensyn/Qwen2.5-1.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-lazy_beaked_camel
 tags:
 - grpo
 - gensyn
 - I am lazy beaked camel
+- unsloth
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-lazy_beaked_camel
+This model is a fine-tuned version of [Gensyn/Qwen2.5-1.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 - TRL: 0.15.2
 - Transformers: 4.51.3
+- Pytorch: 2.6.0
+- Datasets: 3.6.0
 - Tokenizers: 0.21.1
 ## Citations

adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Gensyn/Qwen2.5-1.5B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ed8b705d27bb7f2a02f036484db3457828de98a6d698507a03e3901a531762a
+size 73911112

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 9.27983032852353e-06,
-    "train_runtime": 523.7046,
-    "train_samples": 28,
-    "train_samples_per_second": 0.611,
-    "train_steps_per_second": 0.038
 }

 {
     "total_flos": 0.0,
+    "train_loss": 8.800688064525275e-07,
+    "train_runtime": 1536.4028,
+    "train_samples": 45,
+    "train_samples_per_second": 0.208,
+    "train_steps_per_second": 0.013
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 9.27983032852353e-06,
-    "train_runtime": 523.7046,
-    "train_samples": 28,
-    "train_samples_per_second": 0.611,
-    "train_steps_per_second": 0.038
 }

 {
     "total_flos": 0.0,
+    "train_loss": 8.800688064525275e-07,
+    "train_runtime": 1536.4028,
+    "train_samples": 45,
+    "train_samples_per_second": 0.208,
+    "train_steps_per_second": 0.013
 }

trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.0,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
@@ -10,209 +10,209 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "completion_length": 382.90625,
-      "epoch": 0.5714285714285714,
-      "grad_norm": 7.019376277923584,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0,
-      "reward": 1.4931294089183211,
-      "reward_std": 0.9099325076094829,
-      "rewards/concensus_correctness_reward_func": 0.13106250017881393,
-      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.4232856703456491,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.6887812446802855,
       "step": 2
     },
     {
-      "completion_length": 274.8333333333333,
-      "epoch": 1.0,
-      "grad_norm": 4.1338043212890625,
-      "kl": 0.0020763344461253532,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
-      "reward": 2.677121857802073,
-      "reward_std": 1.935415767133236,
-      "rewards/concensus_correctness_reward_func": 0.9560000002384186,
-      "rewards/consensus_reward_func": 0.16666666666666666,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.16666666666666666,
-      "rewards/question_recreation_reward_func": 0.5650802229841551,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.822708343466123,
       "step": 4
     },
     {
-      "completion_length": 374.625,
-      "epoch": 1.5714285714285714,
-      "grad_norm": 6.631983280181885,
-      "kl": 0.004001049754151609,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
-      "reward": 1.7044404447078705,
-      "reward_std": 0.9565547136589885,
-      "rewards/concensus_correctness_reward_func": 0.11406249925494194,
-      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.5176279472652823,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.8227499909698963,
       "step": 6
     },
     {
-      "completion_length": 322.0833333333333,
-      "epoch": 2.0,
-      "grad_norm": 5.165700435638428,
-      "kl": 0.011109078186564147,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
-      "reward": 1.7368124773104985,
-      "reward_std": 0.3253405654492478,
-      "rewards/concensus_correctness_reward_func": 0.010416666666666666,
-      "rewards/consensus_reward_func": 0.08333333333333333,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.3333333333333333,
-      "rewards/question_recreation_reward_func": 0.5206041888644298,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.7891250103712082,
       "step": 8
     },
     {
-      "completion_length": 449.0625,
-      "epoch": 2.571428571428571,
-      "grad_norm": 6.443226337432861,
-      "kl": 0.010882849674089812,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
-      "reward": 1.7125880680978298,
-      "reward_std": 1.2546559358015656,
-      "rewards/concensus_correctness_reward_func": 0.26981249637901783,
-      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.5314630657667294,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.5988124869763851,
       "step": 10
     },
     {
-      "completion_length": 404.625,
-      "epoch": 3.0,
-      "grad_norm": 4.660242557525635,
-      "kl": 0.01321633932335923,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
-      "reward": 1.678330086171627,
-      "reward_std": 1.2830108457322542,
-      "rewards/concensus_correctness_reward_func": 0.1132500022649765,
-      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.16666666666666666,
-      "rewards/question_recreation_reward_func": 0.6727051039536794,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.47570833563804626,
       "step": 12
     },
     {
-      "completion_length": 342.59375,
-      "epoch": 3.571428571428571,
-      "grad_norm": 6.868558406829834,
-      "kl": 0.015789221710292622,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
-      "reward": 2.1339223235845566,
-      "reward_std": 0.8391035334207118,
-      "rewards/concensus_correctness_reward_func": 0.25056250020861626,
-      "rewards/consensus_reward_func": 0.3125,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.25,
-      "rewards/question_recreation_reward_func": 0.6830785924103111,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.6377812400460243,
       "step": 14
     },
     {
-      "completion_length": 268.4166666666667,
-      "epoch": 4.0,
-      "grad_norm": 7.456064224243164,
-      "kl": 0.01451573691641291,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
-      "reward": 1.7915068914492924,
-      "reward_std": 0.7824884320919713,
-      "rewards/concensus_correctness_reward_func": 0.24650000035762787,
-      "rewards/consensus_reward_func": 0.16666666666666666,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.0,
-      "rewards/question_recreation_reward_func": 0.6036318947250644,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.7747083231806755,
       "step": 16
     },
     {
-      "completion_length": 391.1875,
-      "epoch": 4.571428571428571,
-      "grad_norm": 6.4319868087768555,
-      "kl": 0.014159469516016543,
       "learning_rate": 3.013156219837776e-08,
       "loss": 0.0,
-      "reward": 1.9930320084095001,
-      "reward_std": 1.1655664396821521,
-      "rewards/concensus_correctness_reward_func": 0.2486250028014183,
-      "rewards/consensus_reward_func": 0.375,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.0625,
-      "rewards/question_recreation_reward_func": 0.6031882213428617,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.7037187442183495,
       "step": 18
     },
     {
-      "completion_length": 486.5416666666667,
-      "epoch": 5.0,
-      "grad_norm": 3.5053153038024902,
-      "kl": 0.02437695808475837,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
-      "reward": 2.5299047753214836,
-      "reward_std": 1.897360995799924,
-      "rewards/concensus_correctness_reward_func": 0.9155000001192093,
-      "rewards/consensus_reward_func": 0.0,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.25,
-      "rewards/question_recreation_reward_func": 0.6312380793193976,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.733166666701436,
       "step": 20
     },
     {
-      "epoch": 5.0,
       "step": 20,
       "total_flos": 0.0,
-      "train_loss": 9.27983032852353e-06,
-      "train_runtime": 523.7046,
-      "train_samples_per_second": 0.611,
-      "train_steps_per_second": 0.038
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 7,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -227,7 +227,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.7111111111111112,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "completion_length": 301.84375,
+      "epoch": 0.17777777777777778,
+      "grad_norm": 7.066680908203125,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0,
+      "reward": 3.1373209916055202,
+      "reward_std": 3.210153318941593,
+      "rewards/concensus_correctness_reward_func": 1.6008750051259995,
+      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.1875,
+      "rewards/question_recreation_reward_func": 0.602445937693119,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.046875,
+      "rewards/xmlcount_reward_func": 0.5121249966323376,
       "step": 2
     },
     {
+      "completion_length": 319.78125,
+      "epoch": 0.35555555555555557,
+      "grad_norm": 0.634601891040802,
+      "kl": 0.0009354531721328385,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
+      "reward": 2.114365443587303,
+      "reward_std": 1.4343808069825172,
+      "rewards/concensus_correctness_reward_func": 0.3385624997317791,
+      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.1875,
+      "rewards/question_recreation_reward_func": 0.4953029826283455,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.0625,
+      "rewards/xmlcount_reward_func": 0.5929999966174364,
       "step": 4
     },
     {
+      "completion_length": 264.8125,
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.9841508865356445,
+      "kl": 0.0009024547471199185,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
+      "reward": 1.362575352191925,
+      "reward_std": 0.9097995422780514,
+      "rewards/concensus_correctness_reward_func": 0.05443749949336052,
+      "rewards/consensus_reward_func": 0.0625,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.5367003623396158,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.078125,
+      "rewards/xmlcount_reward_func": 0.6308125089854002,
       "step": 6
     },
     {
+      "completion_length": 316.21875,
+      "epoch": 0.7111111111111111,
+      "grad_norm": 1.0989762544631958,
+      "kl": 0.0010006891025113873,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
+      "reward": 1.2260264866054058,
+      "reward_std": 1.203055463731289,
+      "rewards/concensus_correctness_reward_func": 0.09487500041723251,
+      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.125,
+      "rewards/question_recreation_reward_func": 0.4443702418357134,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.078125,
+      "rewards/xmlcount_reward_func": 0.35865625459700823,
       "step": 8
     },
     {
+      "completion_length": 297.3125,
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.7463904619216919,
+      "kl": 0.0009849788038991392,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
+      "reward": 2.639808088541031,
+      "reward_std": 1.6892759203910828,
+      "rewards/concensus_correctness_reward_func": 0.3762499988079071,
+      "rewards/consensus_reward_func": 0.3125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.625,
+      "rewards/question_recreation_reward_func": 0.6109642945230007,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.046875,
+      "rewards/xmlcount_reward_func": 0.6682187635451555,
       "step": 10
     },
     {
+      "completion_length": 270.25,
+      "epoch": 1.0,
+      "grad_norm": 0.28857168555259705,
+      "kl": 0.0010030706413090228,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
+      "reward": 1.6203292042016983,
+      "reward_std": 1.343556320667267,
+      "rewards/concensus_correctness_reward_func": 0.24619999527931213,
+      "rewards/consensus_reward_func": 0.2,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.1,
+      "rewards/question_recreation_reward_func": 0.5745792031288147,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.05,
+      "rewards/xmlcount_reward_func": 0.44955002069473265,
       "step": 12
     },
     {
+      "completion_length": 224.9375,
+      "epoch": 1.1777777777777778,
+      "grad_norm": 0.64219069480896,
+      "kl": 0.0011269240640103817,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
+      "reward": 2.192922744899988,
+      "reward_std": 2.0775813162326813,
+      "rewards/concensus_correctness_reward_func": 0.8414374999701977,
+      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.125,
+      "rewards/question_recreation_reward_func": 0.49420391861349344,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.0625,
+      "rewards/xmlcount_reward_func": 0.48228123784065247,
       "step": 14
     },
     {
+      "completion_length": 282.40625,
+      "epoch": 1.3555555555555556,
+      "grad_norm": 0.6175893545150757,
+      "kl": 0.0012615923915291205,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
+      "reward": 2.493862770497799,
+      "reward_std": 2.422182433307171,
+      "rewards/concensus_correctness_reward_func": 0.775374997407198,
+      "rewards/consensus_reward_func": 0.375,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.1875,
+      "rewards/question_recreation_reward_func": 0.5001439936459064,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.0625,
+      "rewards/xmlcount_reward_func": 0.5933437421917915,
       "step": 16
     },
     {
+      "completion_length": 304.375,
+      "epoch": 1.5333333333333332,
+      "grad_norm": 0.7023181319236755,
+      "kl": 0.0010216495575150475,
       "learning_rate": 3.013156219837776e-08,
       "loss": 0.0,
+      "reward": 1.5151998028159142,
+      "reward_std": 1.1604772619903088,
+      "rewards/concensus_correctness_reward_func": 0.14925000071525574,
+      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.5289810225367546,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.109375,
+      "rewards/xmlcount_reward_func": 0.5400937534868717,
       "step": 18
     },
     {
+      "completion_length": 304.0,
+      "epoch": 1.7111111111111112,
+      "grad_norm": 0.9491791129112244,
+      "kl": 0.0010110394432558678,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
+      "reward": 2.6542146876454353,
+      "reward_std": 2.6235328931361437,
+      "rewards/concensus_correctness_reward_func": 1.021937470883131,
+      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.125,
+      "rewards/question_recreation_reward_func": 0.4642459051683545,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.046875,
+      "rewards/xmlcount_reward_func": 0.5586562408134341,
       "step": 20
     },
     {
+      "epoch": 1.7111111111111112,
       "step": 20,
       "total_flos": 0.0,
+      "train_loss": 8.800688064525275e-07,
+      "train_runtime": 1536.4028,
+      "train_samples_per_second": 0.208,
+      "train_steps_per_second": 0.013
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98c6f059c89d93eefbe71d966ea8785c5ee6165141aeea352216f427430a681c
-size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:96fa84a84bc6a9e947847c2b0388fc25f377f8c4c56457e129a4434c84c01b25
+size 5944