End of training

Browse files

Files changed (5) hide show

README.md +4 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +365 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama2
 base_model: meta-llama/CodeLlama-7b-Instruct-hf
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: text-to-odrl-codellama.7b-v0
@@ -15,7 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 # text-to-odrl-codellama.7b-v0
-This model is a fine-tuned version of [meta-llama/CodeLlama-7b-Instruct-hf](https://huggingface.co/meta-llama/CodeLlama-7b-Instruct-hf) on an unknown dataset.
 ## Model description

 base_model: meta-llama/CodeLlama-7b-Instruct-hf
 tags:
 - llama-factory
+- lora
 - generated_from_trainer
 model-index:
 - name: text-to-odrl-codellama.7b-v0
 # text-to-odrl-codellama.7b-v0
+This model is a fine-tuned version of [meta-llama/CodeLlama-7b-Instruct-hf](https://huggingface.co/meta-llama/CodeLlama-7b-Instruct-hf) on the text_to_odrl_train dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0923
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.0,
+    "eval_loss": 0.09225524216890335,
+    "eval_runtime": 945.9721,
+    "eval_samples_per_second": 3.923,
+    "eval_steps_per_second": 1.962,
+    "total_flos": 2.4087255599087616e+17,
+    "train_loss": 0.11968068110531774,
+    "train_runtime": 7304.8453,
+    "train_samples_per_second": 1.016,
+    "train_steps_per_second": 0.064
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.0,
+    "eval_loss": 0.09225524216890335,
+    "eval_runtime": 945.9721,
+    "eval_samples_per_second": 3.923,
+    "eval_steps_per_second": 1.962
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "total_flos": 2.4087255599087616e+17,
+    "train_loss": 0.11968068110531774,
+    "train_runtime": 7304.8453,
+    "train_samples_per_second": 1.016,
+    "train_steps_per_second": 0.064
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,365 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 464,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04310344827586207,
+      "grad_norm": 0.22970597445964813,
+      "learning_rate": 3.2142857142857144e-05,
+      "loss": 0.5477,
+      "step": 10
+    },
+    {
+      "epoch": 0.08620689655172414,
+      "grad_norm": 0.24922381341457367,
+      "learning_rate": 4.99847706754774e-05,
+      "loss": 0.4192,
+      "step": 20
+    },
+    {
+      "epoch": 0.12931034482758622,
+      "grad_norm": 0.1861911565065384,
+      "learning_rate": 4.9863047384206835e-05,
+      "loss": 0.2814,
+      "step": 30
+    },
+    {
+      "epoch": 0.1724137931034483,
+      "grad_norm": 0.16707484424114227,
+      "learning_rate": 4.962019382530521e-05,
+      "loss": 0.2082,
+      "step": 40
+    },
+    {
+      "epoch": 0.21551724137931033,
+      "grad_norm": 0.17490606009960175,
+      "learning_rate": 4.925739315689991e-05,
+      "loss": 0.1599,
+      "step": 50
+    },
+    {
+      "epoch": 0.25862068965517243,
+      "grad_norm": 0.16346780955791473,
+      "learning_rate": 4.877641290737884e-05,
+      "loss": 0.144,
+      "step": 60
+    },
+    {
+      "epoch": 0.3017241379310345,
+      "grad_norm": 0.16197219491004944,
+      "learning_rate": 4.817959636416969e-05,
+      "loss": 0.123,
+      "step": 70
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.1676415354013443,
+      "learning_rate": 4.7469851157479177e-05,
+      "loss": 0.1168,
+      "step": 80
+    },
+    {
+      "epoch": 0.3879310344827586,
+      "grad_norm": 0.14542680978775024,
+      "learning_rate": 4.665063509461097e-05,
+      "loss": 0.1112,
+      "step": 90
+    },
+    {
+      "epoch": 0.43103448275862066,
+      "grad_norm": 0.1882169097661972,
+      "learning_rate": 4.572593931387604e-05,
+      "loss": 0.1005,
+      "step": 100
+    },
+    {
+      "epoch": 0.47413793103448276,
+      "grad_norm": 0.14488768577575684,
+      "learning_rate": 4.4700268840168045e-05,
+      "loss": 0.102,
+      "step": 110
+    },
+    {
+      "epoch": 0.5172413793103449,
+      "grad_norm": 0.15230809152126312,
+      "learning_rate": 4.357862063693486e-05,
+      "loss": 0.103,
+      "step": 120
+    },
+    {
+      "epoch": 0.5603448275862069,
+      "grad_norm": 0.13396801054477692,
+      "learning_rate": 4.2366459261474933e-05,
+      "loss": 0.1035,
+      "step": 130
+    },
+    {
+      "epoch": 0.603448275862069,
+      "grad_norm": 0.1350702941417694,
+      "learning_rate": 4.1069690242163484e-05,
+      "loss": 0.0995,
+      "step": 140
+    },
+    {
+      "epoch": 0.646551724137931,
+      "grad_norm": 0.1560034453868866,
+      "learning_rate": 3.969463130731183e-05,
+      "loss": 0.0979,
+      "step": 150
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.13901282846927643,
+      "learning_rate": 3.824798160583012e-05,
+      "loss": 0.0947,
+      "step": 160
+    },
+    {
+      "epoch": 0.7327586206896551,
+      "grad_norm": 0.18362534046173096,
+      "learning_rate": 3.673678906964727e-05,
+      "loss": 0.0969,
+      "step": 170
+    },
+    {
+      "epoch": 0.7758620689655172,
+      "grad_norm": 0.17652247846126556,
+      "learning_rate": 3.516841607689501e-05,
+      "loss": 0.0905,
+      "step": 180
+    },
+    {
+      "epoch": 0.8189655172413793,
+      "grad_norm": 0.1358911544084549,
+      "learning_rate": 3.355050358314172e-05,
+      "loss": 0.0925,
+      "step": 190
+    },
+    {
+      "epoch": 0.8620689655172413,
+      "grad_norm": 0.1052212044596672,
+      "learning_rate": 3.1890933895424976e-05,
+      "loss": 0.0953,
+      "step": 200
+    },
+    {
+      "epoch": 0.9051724137931034,
+      "grad_norm": 0.13334128260612488,
+      "learning_rate": 3.0197792270443982e-05,
+      "loss": 0.0886,
+      "step": 210
+    },
+    {
+      "epoch": 0.9482758620689655,
+      "grad_norm": 0.12180697172880173,
+      "learning_rate": 2.8479327524001636e-05,
+      "loss": 0.0914,
+      "step": 220
+    },
+    {
+      "epoch": 0.9913793103448276,
+      "grad_norm": 0.1343410611152649,
+      "learning_rate": 2.674391184360313e-05,
+      "loss": 0.0887,
+      "step": 230
+    },
+    {
+      "epoch": 1.0344827586206897,
+      "grad_norm": 0.13129718601703644,
+      "learning_rate": 2.5e-05,
+      "loss": 0.0993,
+      "step": 240
+    },
+    {
+      "epoch": 1.0775862068965518,
+      "grad_norm": 0.12985403835773468,
+      "learning_rate": 2.3256088156396868e-05,
+      "loss": 0.0925,
+      "step": 250
+    },
+    {
+      "epoch": 1.1206896551724137,
+      "grad_norm": 0.10140767693519592,
+      "learning_rate": 2.1520672475998373e-05,
+      "loss": 0.086,
+      "step": 260
+    },
+    {
+      "epoch": 1.1637931034482758,
+      "grad_norm": 0.10552043467760086,
+      "learning_rate": 1.980220772955602e-05,
+      "loss": 0.0885,
+      "step": 270
+    },
+    {
+      "epoch": 1.206896551724138,
+      "grad_norm": 0.17615962028503418,
+      "learning_rate": 1.8109066104575023e-05,
+      "loss": 0.0935,
+      "step": 280
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.16146473586559296,
+      "learning_rate": 1.6449496416858284e-05,
+      "loss": 0.089,
+      "step": 290
+    },
+    {
+      "epoch": 1.293103448275862,
+      "grad_norm": 0.11695656925439835,
+      "learning_rate": 1.4831583923104999e-05,
+      "loss": 0.0934,
+      "step": 300
+    },
+    {
+      "epoch": 1.3362068965517242,
+      "grad_norm": 0.13488854467868805,
+      "learning_rate": 1.3263210930352737e-05,
+      "loss": 0.0902,
+      "step": 310
+    },
+    {
+      "epoch": 1.3793103448275863,
+      "grad_norm": 0.14104993641376495,
+      "learning_rate": 1.175201839416988e-05,
+      "loss": 0.0915,
+      "step": 320
+    },
+    {
+      "epoch": 1.4224137931034484,
+      "grad_norm": 0.11073966324329376,
+      "learning_rate": 1.0305368692688174e-05,
+      "loss": 0.0904,
+      "step": 330
+    },
+    {
+      "epoch": 1.4655172413793103,
+      "grad_norm": 0.12246192991733551,
+      "learning_rate": 8.930309757836517e-06,
+      "loss": 0.086,
+      "step": 340
+    },
+    {
+      "epoch": 1.5086206896551724,
+      "grad_norm": 0.11951974779367447,
+      "learning_rate": 7.633540738525066e-06,
+      "loss": 0.0883,
+      "step": 350
+    },
+    {
+      "epoch": 1.5517241379310345,
+      "grad_norm": 0.11673244833946228,
+      "learning_rate": 6.421379363065142e-06,
+      "loss": 0.0868,
+      "step": 360
+    },
+    {
+      "epoch": 1.5948275862068966,
+      "grad_norm": 0.11525051295757294,
+      "learning_rate": 5.299731159831953e-06,
+      "loss": 0.0889,
+      "step": 370
+    },
+    {
+      "epoch": 1.6379310344827587,
+      "grad_norm": 0.16884970664978027,
+      "learning_rate": 4.274060686123959e-06,
+      "loss": 0.0905,
+      "step": 380
+    },
+    {
+      "epoch": 1.6810344827586206,
+      "grad_norm": 0.12776394188404083,
+      "learning_rate": 3.3493649053890326e-06,
+      "loss": 0.0905,
+      "step": 390
+    },
+    {
+      "epoch": 1.7241379310344827,
+      "grad_norm": 0.13251963257789612,
+      "learning_rate": 2.5301488425208296e-06,
+      "loss": 0.0862,
+      "step": 400
+    },
+    {
+      "epoch": 1.7672413793103448,
+      "grad_norm": 0.14248506724834442,
+      "learning_rate": 1.8204036358303173e-06,
+      "loss": 0.09,
+      "step": 410
+    },
+    {
+      "epoch": 1.8103448275862069,
+      "grad_norm": 0.10634557157754898,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 0.0839,
+      "step": 420
+    },
+    {
+      "epoch": 1.853448275862069,
+      "grad_norm": 0.12421922385692596,
+      "learning_rate": 7.426068431000882e-07,
+      "loss": 0.088,
+      "step": 430
+    },
+    {
+      "epoch": 1.896551724137931,
+      "grad_norm": 0.12003956735134125,
+      "learning_rate": 3.7980617469479953e-07,
+      "loss": 0.0932,
+      "step": 440
+    },
+    {
+      "epoch": 1.9396551724137931,
+      "grad_norm": 0.10775435715913773,
+      "learning_rate": 1.3695261579316777e-07,
+      "loss": 0.0842,
+      "step": 450
+    },
+    {
+      "epoch": 1.9827586206896552,
+      "grad_norm": 0.13052473962306976,
+      "learning_rate": 1.522932452260595e-08,
+      "loss": 0.0894,
+      "step": 460
+    },
+    {
+      "epoch": 2.0,
+      "step": 464,
+      "total_flos": 2.4087255599087616e+17,
+      "train_loss": 0.11968068110531774,
+      "train_runtime": 7304.8453,
+      "train_samples_per_second": 1.016,
+      "train_steps_per_second": 0.064
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 464,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.4087255599087616e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}