Tapendra
/

gemma-3-4b-it_checkpoint_v7

PEFT

Safetensors

Model card Files Files and versions Community

Tapendra commited on 28 days ago

Commit

6339bdb

verified ·

1 Parent(s): 0cf055e

Upload trainer_state.json with huggingface_hub

Browse files

Files changed (1) hide show

trainer_state.json +160 -0

trainer_state.json ADDED Viewed

	@@ -0,0 +1,160 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.0,
+  "eval_steps": 500,
+  "global_step": 18390,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8156606851549756,
+      "grad_norm": 2.7835583686828613,
+      "learning_rate": 0.00019968889417401253,
+      "loss": 0.6438,
+      "step": 1000
+    },
+    {
+      "epoch": 1.631321370309951,
+      "grad_norm": 0.3261709213256836,
+      "learning_rate": 0.00019676585418772425,
+      "loss": 0.2388,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4469820554649266,
+      "grad_norm": 2.9635205268859863,
+      "learning_rate": 0.00019084912501825553,
+      "loss": 0.2032,
+      "step": 3000
+    },
+    {
+      "epoch": 3.262642740619902,
+      "grad_norm": 2.162414312362671,
+      "learning_rate": 0.00018212175520336934,
+      "loss": 0.163,
+      "step": 4000
+    },
+    {
+      "epoch": 4.078303425774878,
+      "grad_norm": 3.4721012115478516,
+      "learning_rate": 0.00017085374734710157,
+      "loss": 0.1519,
+      "step": 5000
+    },
+    {
+      "epoch": 4.893964110929853,
+      "grad_norm": 1.8501887321472168,
+      "learning_rate": 0.0001573937049265616,
+      "loss": 0.13,
+      "step": 6000
+    },
+    {
+      "epoch": 5.709624796084829,
+      "grad_norm": 3.3048102855682373,
+      "learning_rate": 0.00014215804738782126,
+      "loss": 0.1186,
+      "step": 7000
+    },
+    {
+      "epoch": 6.525285481239804,
+      "grad_norm": 2.223738193511963,
+      "learning_rate": 0.00012561812718836913,
+      "loss": 0.1046,
+      "step": 8000
+    },
+    {
+      "epoch": 7.3409461663947795,
+      "grad_norm": 0.2893391251564026,
+      "learning_rate": 0.00010828564735203954,
+      "loss": 0.0957,
+      "step": 9000
+    },
+    {
+      "epoch": 8.156606851549755,
+      "grad_norm": 0.24986231327056885,
+      "learning_rate": 9.069683068014265e-05,
+      "loss": 0.0912,
+      "step": 10000
+    },
+    {
+      "epoch": 8.97226753670473,
+      "grad_norm": 0.6728571653366089,
+      "learning_rate": 7.339583038310173e-05,
+      "loss": 0.08,
+      "step": 11000
+    },
+    {
+      "epoch": 9.787928221859707,
+      "grad_norm": 0.3318624794483185,
+      "learning_rate": 5.69178953654216e-05,
+      "loss": 0.0722,
+      "step": 12000
+    },
+    {
+      "epoch": 10.603588907014682,
+      "grad_norm": 0.1652187556028366,
+      "learning_rate": 4.177281098721372e-05,
+      "loss": 0.07,
+      "step": 13000
+    },
+    {
+      "epoch": 11.419249592169658,
+      "grad_norm": 0.13997943699359894,
+      "learning_rate": 2.8429127602959905e-05,
+      "loss": 0.0635,
+      "step": 14000
+    },
+    {
+      "epoch": 12.234910277324634,
+      "grad_norm": 0.17078348994255066,
+      "learning_rate": 1.729966480637476e-05,
+      "loss": 0.0607,
+      "step": 15000
+    },
+    {
+      "epoch": 13.05057096247961,
+      "grad_norm": 0.10830472409725189,
+      "learning_rate": 8.728739843127509e-06,
+      "loss": 0.0599,
+      "step": 16000
+    },
+    {
+      "epoch": 13.866231647634583,
+      "grad_norm": 0.13888326287269592,
+      "learning_rate": 2.9815153118197825e-06,
+      "loss": 0.0566,
+      "step": 17000
+    },
+    {
+      "epoch": 14.681892332789559,
+      "grad_norm": 0.138445645570755,
+      "learning_rate": 2.3579570823278885e-07,
+      "loss": 0.0556,
+      "step": 18000
+    }
+  ],
+  "logging_steps": 1000,
+  "max_steps": 18390,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 6000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.5627004461372e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}