End of training

Browse files

Files changed (7) hide show

README.md +4 -3
adapter_config.json +39 -0
adapter_model.safetensors +3 -0
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +123 -123
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: Gensyn/Qwen2.5-0.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-diving_winged_aardvark
 tags:
@@ -8,13 +8,14 @@ tags:
 - grpo
 - gensyn
 - I am diving winged aardvark
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-diving_winged_aardvark
-This model is a fine-tuned version of [Gensyn/Qwen2.5-0.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-0.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -39,7 +40,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
 - TRL: 0.15.2
 - Transformers: 4.51.3
-- Pytorch: 2.5.1
 - Datasets: 3.5.0
 - Tokenizers: 0.21.1

 ---
+base_model: Gensyn/Qwen2.5-1.5B-Instruct
 library_name: transformers
 model_name: gensyn-checkpoints-diving_winged_aardvark
 tags:
 - grpo
 - gensyn
 - I am diving winged aardvark
+- unsloth
 - trl
 licence: license
 ---
 # Model Card for gensyn-checkpoints-diving_winged_aardvark
+This model is a fine-tuned version of [Gensyn/Qwen2.5-1.5B-Instruct](https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 - TRL: 0.15.2
 - Transformers: 4.51.3
+- Pytorch: 2.6.0
 - Datasets: 3.5.0
 - Tokenizers: 0.21.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Gensyn/Qwen2.5-1.5B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96af1fd8a8cb21f833b2bd1b1565b3f9396174638481c4259fe3f615a3c4bcd7
+size 73911112

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 3.66260876035085e-05,
-    "train_runtime": 246.0607,
-    "train_samples": 28,
-    "train_samples_per_second": 1.3,
-    "train_steps_per_second": 0.081
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.941893218481709e-07,
+    "train_runtime": 1892.3638,
+    "train_samples": 11,
+    "train_samples_per_second": 0.169,
+    "train_steps_per_second": 0.011
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 3.66260876035085e-05,
-    "train_runtime": 246.0607,
-    "train_samples": 28,
-    "train_samples_per_second": 1.3,
-    "train_steps_per_second": 0.081
 }

 {
     "total_flos": 0.0,
+    "train_loss": 6.941893218481709e-07,
+    "train_runtime": 1892.3638,
+    "train_samples": 11,
+    "train_samples_per_second": 0.169,
+    "train_steps_per_second": 0.011
 }

trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.0,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
@@ -10,209 +10,209 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "completion_length": 342.96875,
-      "epoch": 0.5714285714285714,
-      "grad_norm": 4.636115550994873,
-      "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": 0.0,
-      "reward": 2.160812631249428,
-      "reward_std": 0.6587000478175469,
-      "rewards/concensus_correctness_reward_func": 0.4399375021457672,
-      "rewards/consensus_reward_func": 0.625,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.125,
-      "rewards/question_recreation_reward_func": 0.5692501617595553,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.046875,
-      "rewards/xmlcount_reward_func": 0.35475000459700823,
       "step": 2
     },
     {
-      "completion_length": 475.5416666666667,
-      "epoch": 1.0,
-      "grad_norm": 2.719712257385254,
-      "kl": 0.0018013427131033193,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
-      "reward": 3.882448842128118,
-      "reward_std": 2.6914940675099692,
-      "rewards/concensus_correctness_reward_func": 1.1753333335121472,
-      "rewards/consensus_reward_func": 0.4166666666666667,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.75,
-      "rewards/question_recreation_reward_func": 0.6292404855291048,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9112083464860916,
       "step": 4
     },
     {
-      "completion_length": 292.09375,
-      "epoch": 1.5714285714285714,
-      "grad_norm": 8.258121490478516,
-      "kl": 0.029607473421492614,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
-      "reward": 2.336483985185623,
-      "reward_std": 0.8101844638586044,
-      "rewards/concensus_correctness_reward_func": 0.14556249976158142,
-      "rewards/consensus_reward_func": 0.0625,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.625,
-      "rewards/question_recreation_reward_func": 0.5946714862948284,
       "rewards/soft_format_reward_func": 0.0,
-      "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9087500139139593,
       "step": 6
     },
     {
-      "completion_length": 367.3333333333333,
-      "epoch": 2.0,
-      "grad_norm": 2.8354005813598633,
-      "kl": 0.031232848976893973,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
-      "reward": 3.037147363026937,
-      "reward_std": 1.0202810720851023,
-      "rewards/concensus_correctness_reward_func": 0.3407499964038531,
-      "rewards/consensus_reward_func": 0.6666666666666666,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.3333333333333333,
-      "rewards/question_recreation_reward_func": 0.7943973168730736,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9020000025629997,
       "step": 8
     },
     {
-      "completion_length": 243.59375,
-      "epoch": 2.571428571428571,
-      "grad_norm": 9.608664512634277,
-      "kl": 0.02638791529170703,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
-      "reward": 2.3775548972189426,
-      "reward_std": 1.3022976123647823,
-      "rewards/concensus_correctness_reward_func": 0.3585625011473894,
-      "rewards/consensus_reward_func": 0.3125,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.1875,
-      "rewards/question_recreation_reward_func": 0.5948673833627254,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.924125000834465,
       "step": 10
     },
     {
-      "completion_length": 302.2083333333333,
-      "epoch": 3.0,
-      "grad_norm": 5.144018650054932,
-      "kl": 0.01467265534059455,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
-      "reward": 3.758754551410675,
-      "reward_std": 1.0187010215595365,
-      "rewards/concensus_correctness_reward_func": 0.6190833350022634,
-      "rewards/consensus_reward_func": 0.6666666666666666,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.8333333333333334,
-      "rewards/question_recreation_reward_func": 0.7559212238217393,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.8837500164906184,
       "step": 12
     },
     {
-      "completion_length": 310.5625,
-      "epoch": 3.571428571428571,
-      "grad_norm": 9.093050956726074,
-      "kl": 0.024251463531982154,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
-      "reward": 3.3967253491282463,
-      "reward_std": 0.9880957853747532,
-      "rewards/concensus_correctness_reward_func": 0.5252499990165234,
-      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.75,
-      "rewards/question_recreation_reward_func": 0.7096316055394709,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9743437394499779,
       "step": 14
     },
     {
-      "completion_length": 288.0416666666667,
-      "epoch": 4.0,
-      "grad_norm": 4.826526641845703,
-      "kl": 0.05147159545837591,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
-      "reward": 2.3825169702370963,
-      "reward_std": 0.9278464317370284,
-      "rewards/concensus_correctness_reward_func": 0.3700833370288213,
-      "rewards/consensus_reward_func": 0.3333333333333333,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.25,
-      "rewards/question_recreation_reward_func": 0.6023503045241038,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.8267500102519989,
       "step": 16
     },
     {
-      "completion_length": 334.5,
-      "epoch": 4.571428571428571,
-      "grad_norm": 6.352971076965332,
-      "kl": 0.2050690782489255,
       "learning_rate": 3.013156219837776e-08,
-      "loss": 0.0002,
-      "reward": 3.8376564756035805,
-      "reward_std": 2.506588280783035,
-      "rewards/concensus_correctness_reward_func": 1.0865624994039536,
-      "rewards/consensus_reward_func": 0.4375,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.75,
-      "rewards/question_recreation_reward_func": 0.6192189978901297,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9443750027567148,
       "step": 18
     },
     {
-      "completion_length": 258.7083333333333,
-      "epoch": 5.0,
-      "grad_norm": 3.8929505348205566,
-      "kl": 0.03378994130374243,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
-      "reward": 2.443352550268173,
-      "reward_std": 0.7956977238257726,
-      "rewards/concensus_correctness_reward_func": 0.3044166713953018,
-      "rewards/consensus_reward_func": 0.25,
       "rewards/cumulative_reward_2": 0.0,
-      "rewards/final_correctness_reward_func": 0.3333333333333333,
-      "rewards/question_recreation_reward_func": 0.5871858916555842,
-      "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
-      "rewards/xmlcount_reward_func": 0.9684166759252548,
       "step": 20
     },
     {
-      "epoch": 5.0,
       "step": 20,
       "total_flos": 0.0,
-      "train_loss": 3.66260876035085e-05,
-      "train_runtime": 246.0607,
-      "train_samples_per_second": 1.3,
-      "train_steps_per_second": 0.081
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 7,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -227,7 +227,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.7272727272727275,
   "eval_steps": 500,
   "global_step": 20,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "completion_length": 434.71875,
+      "epoch": 0.7272727272727273,
+      "grad_norm": 0.37755826115608215,
+      "kl": 0.0007222239291877486,
       "learning_rate": 5e-07,
       "loss": 0.0,
+      "reward": 0.6773073673248291,
+      "reward_std": 0.7603964880108833,
+      "rewards/concensus_correctness_reward_func": 0.05237499997019768,
+      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.3419636320322752,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.015625,
+      "rewards/xmlcount_reward_func": 0.14234375208616257,
       "step": 2
     },
     {
+      "completion_length": 503.35714285714283,
+      "epoch": 1.3636363636363638,
+      "grad_norm": 0.3544762134552002,
+      "kl": 0.0006736505310982466,
       "learning_rate": 4.864543104251586e-07,
       "loss": 0.0,
+      "reward": 1.0537827483245306,
+      "reward_std": 0.8825796374252864,
+      "rewards/concensus_correctness_reward_func": 0.03142857125827244,
+      "rewards/consensus_reward_func": 0.21428571428571427,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.34246131192360607,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.017857142857142856,
+      "rewards/xmlcount_reward_func": 0.44774999575955526,
       "step": 4
     },
     {
+      "completion_length": 298.17857142857144,
+      "epoch": 2.0,
+      "grad_norm": 0.5695165991783142,
+      "kl": 0.000864676565730146,
       "learning_rate": 4.472851273490984e-07,
       "loss": 0.0,
+      "reward": 1.7759888512747628,
+      "reward_std": 1.3623838126659393,
+      "rewards/concensus_correctness_reward_func": 0.12150000035762787,
+      "rewards/consensus_reward_func": 0.5714285714285714,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.32202458488089697,
       "rewards/soft_format_reward_func": 0.0,
+      "rewards/strict_format_reward_func": 0.07142857142857142,
+      "rewards/xmlcount_reward_func": 0.6896071561745235,
       "step": 6
     },
     {
+      "completion_length": 456.65625,
+      "epoch": 2.7272727272727275,
+      "grad_norm": 0.4480558931827545,
+      "kl": 0.0006415472271328326,
       "learning_rate": 3.867370395306068e-07,
       "loss": 0.0,
+      "reward": 0.7656630929559469,
+      "reward_std": 0.9386032819747925,
+      "rewards/concensus_correctness_reward_func": 0.02356250025331974,
+      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.39931935630738735,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.21778127364814281,
       "step": 8
     },
     {
+      "completion_length": 410.92857142857144,
+      "epoch": 3.3636363636363638,
+      "grad_norm": 0.3481834828853607,
+      "kl": 0.0007305198731566114,
       "learning_rate": 3.1137137178519977e-07,
       "loss": 0.0,
+      "reward": 0.874864114182336,
+      "reward_std": 1.0430768387658256,
+      "rewards/concensus_correctness_reward_func": 0.04728571431977408,
+      "rewards/consensus_reward_func": 0.35714285714285715,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.23861412597554071,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.23182143058095658,
       "step": 10
     },
     {
+      "completion_length": 368.7857142857143,
+      "epoch": 4.0,
+      "grad_norm": 0.5070796608924866,
+      "kl": 0.0008041880043622639,
       "learning_rate": 2.2935516363191693e-07,
       "loss": 0.0,
+      "reward": 0.8887122677905219,
+      "reward_std": 0.53629674097257,
+      "rewards/concensus_correctness_reward_func": 0.04728571431977408,
+      "rewards/consensus_reward_func": 0.14285714285714285,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.3834265449217388,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.3151428465332304,
       "step": 12
     },
     {
+      "completion_length": 446.84375,
+      "epoch": 4.7272727272727275,
+      "grad_norm": 0.5358139872550964,
+      "kl": 0.0007380423085123766,
       "learning_rate": 1.4957614383675767e-07,
       "loss": 0.0,
+      "reward": 0.7791000567376614,
+      "reward_std": 0.9451805762946606,
+      "rewards/concensus_correctness_reward_func": 0.08275000005960464,
+      "rewards/consensus_reward_func": 0.1875,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.30969385243952274,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.1991562508046627,
       "step": 14
     },
     {
+      "completion_length": 375.39285714285717,
+      "epoch": 5.363636363636363,
+      "grad_norm": 0.43942785263061523,
+      "kl": 0.0008642979165805238,
       "learning_rate": 8.067960709356478e-08,
       "loss": 0.0,
+      "reward": 0.951131990977696,
+      "reward_std": 0.666512497833797,
+      "rewards/concensus_correctness_reward_func": 0.06571428690637861,
+      "rewards/consensus_reward_func": 0.21428571428571427,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.4337748225246157,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.2373571459736143,
       "step": 16
     },
     {
+      "completion_length": 419.5357142857143,
+      "epoch": 6.0,
+      "grad_norm": 0.6359270811080933,
+      "kl": 0.0007697979703412525,
       "learning_rate": 3.013156219837776e-08,
+      "loss": 0.0,
+      "reward": 0.6178087230239596,
+      "reward_std": 0.7275546810456684,
+      "rewards/concensus_correctness_reward_func": 0.0,
+      "rewards/consensus_reward_func": 0.14285714285714285,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.20834443398884364,
       "rewards/soft_format_reward_func": 0.0,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.266607152564185,
       "step": 18
     },
     {
+      "completion_length": 505.1875,
+      "epoch": 6.7272727272727275,
+      "grad_norm": 0.4273310899734497,
+      "kl": 0.0006926624046172947,
       "learning_rate": 3.4096741493194193e-09,
       "loss": 0.0,
+      "reward": 0.6878235023468733,
+      "reward_std": 0.9300711061805487,
+      "rewards/concensus_correctness_reward_func": 0.0,
+      "rewards/consensus_reward_func": 0.125,
       "rewards/cumulative_reward_2": 0.0,
+      "rewards/final_correctness_reward_func": 0.0,
+      "rewards/question_recreation_reward_func": 0.34247972909361124,
+      "rewards/soft_format_reward_func": 0.015625,
       "rewards/strict_format_reward_func": 0.0,
+      "rewards/xmlcount_reward_func": 0.20471875369548798,
       "step": 20
     },
     {
+      "epoch": 6.7272727272727275,
       "step": 20,
       "total_flos": 0.0,
+      "train_loss": 6.941893218481709e-07,
+      "train_runtime": 1892.3638,
+      "train_samples_per_second": 0.169,
+      "train_steps_per_second": 0.011
     }
   ],
   "logging_steps": 2,
   "max_steps": 20,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70a83e52943e7572c83b9db4288f8284059ee6b1f50f817e226789c8671d6329
-size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8759273f59794319e15abbe003b1f375ddaf93d4d09e8e15855128cb69a7e14
+size 5944