Next training iteration (185k)

Browse files

Files changed (9) hide show

added_tokens.json +1 -1
config.json +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
tokenizer.json +0 -0
tokenizer_config.json +1 -1
trainer_state.json +136 -5
training_args.bin +2 -2

added_tokens.json CHANGED Viewed

@@ -1 +1 @@

- {"~~SHORT_HYPHENATED_TOKEN~~": ~~32105~~, "~~END_SPONSOR_TOKEN~~": ~~32113~~, "~~EXTRACT_SEGMENTS:~~ ": ~~32100~~, "~~[Music]~~": ~~32107~~, "~~NUMBER_PERCENTAGE_TOKEN~~": ~~32103~~, "~~LONG_WORD_TOKEN~~": ~~32106~~, "~~HYPHENATED_URL_TOKEN~~": ~~32102~~, "~~START_INTERACTION_TOKEN~~": ~~32116~~, "~~URL_TOKEN~~": ~~32101~~, "~~END_INTERACTION_TOKEN~~": ~~32117~~, "~~END_SELFPROMO_TOKEN~~": ~~32115~~, "~~NUMBER_TOKEN~~": ~~32104~~, "NO_SEGMENT_TOKEN": 32111, "~~PROFANITY_TOKEN~~": ~~32110~~, "BETWEEN_SEGMENTS_TOKEN": 32118, "~~START_SELFPROMO_TOKEN~~": ~~32114~~, "~~[Laughter]~~": ~~32109~~, "[Applause]": 32108, "~~START_SPONSOR_TOKEN~~": ~~32112~~}

+ {"START_INTERACTION_TOKEN": 32116, "LONG_WORD_TOKEN": 32106, "NUMBER_PERCENTAGE_TOKEN": 32103, "END_INTERACTION_TOKEN": 32117, "START_SELFPROMO_TOKEN": 32114, "[Music]": 32107, "END_SPONSOR_TOKEN": 32113, "URL_TOKEN": 32101, "NUMBER_TOKEN": 32104, "PROFANITY_TOKEN": 32110, "HYPHENATED_URL_TOKEN": 32102, "END_SELFPROMO_TOKEN": 32115, "NO_SEGMENT_TOKEN": 32111, "SHORT_HYPHENATED_TOKEN": 32105, "BETWEEN_SEGMENTS_TOKEN": 32118, "START_SPONSOR_TOKEN": 32112, "EXTRACT_SEGMENTS: ": 32100, "[Applause]": 32108, "[Laughter]": 32109}

config.json CHANGED Viewed

@@ -22,7 +22,7 @@
   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.15.0",
   "use_cache": true,
   "vocab_size": 32119
 }

   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.16.1",
   "use_cache": true,
   "vocab_size": 32119
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71bb817c2eb93b2299b2db3e9a4fdf54230ad4f8af14493ca0539096676da785
 size 990383053

 version https://git-lfs.github.com/spec/v1
+oid sha256:a23fa3d63f7b869dd2fcde0ae59b6f84a4d6a7c2e5e7cea918425d09eec82f7a
 size 990383053

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f5898f076f1662c347f647ca9305dacb1097aa523e328bb84cc7c4a92dda75a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:c730413130fc63756f54d5820e7a7171dce6b48472fe73812d2eeee7d3ecc44d
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2e5c883e4333a69700fe4d098d3261cf9eac2c32334388c85d05b1fa6f3a483
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:8fc17a095b50cb3c162e9244c249edb34b9632ba405100031ff659035a9789a7
 size 623

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1 +1 @@

- {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "google/t5-v1_1-base", "special_tokens_map_file": "/root/.cache/huggingface/transformers/76bf19bfedb85afbe644966ca9ab7b0404d753a41bf601115bced39f825ffa9c.c94798918c92ded6aeef2d2f0e666d2cc4145eca1aa6e1336fde07f2e13e2f46", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "google/t5-v1_1-base", "max_length": 768, "special_tokens_map_file": "/root/.cache/huggingface/transformers/76bf19bfedb85afbe644966ca9ab7b0404d753a41bf601115bced39f825ffa9c.c94798918c92ded6aeef2d2f0e666d2cc4145eca1aa6e1336fde07f2e13e2f46", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1612045201747457,
-  "global_step": 90000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -122,11 +122,142 @@
       "learning_rate": 4.1939773991262716e-05,
       "loss": 0.0966,
       "step": 90000
     }
   ],
-  "max_steps": 558297,
-  "num_train_epochs": 1,
-  "total_flos": 8.885642742554112e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0634139611882645,
+  "global_step": 185000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.1939773991262716e-05,
       "loss": 0.0966,
       "step": 90000
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 4.149198365744398e-05,
+      "loss": 0.096,
+      "step": 95000
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 4.104419332362524e-05,
+      "loss": 0.0891,
+      "step": 100000
+    },
+    {
+      "epoch": 0.18,
+      "eval_loss": 0.08249569684267044,
+      "eval_runtime": 3211.3155,
+      "eval_samples_per_second": 19.317,
+      "eval_steps_per_second": 9.659,
+      "step": 100000
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 4.0596402989806505e-05,
+      "loss": 0.0897,
+      "step": 105000
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 4.0148612655987766e-05,
+      "loss": 0.0895,
+      "step": 110000
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 3.970082232216903e-05,
+      "loss": 0.0929,
+      "step": 115000
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 3.925303198835029e-05,
+      "loss": 0.0857,
+      "step": 120000
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 3.880524165453155e-05,
+      "loss": 0.0879,
+      "step": 125000
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 3.835745132071281e-05,
+      "loss": 0.0855,
+      "step": 130000
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 3.790966098689408e-05,
+      "loss": 0.0841,
+      "step": 135000
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 3.746187065307534e-05,
+      "loss": 0.0866,
+      "step": 140000
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 3.70140803192566e-05,
+      "loss": 0.0855,
+      "step": 145000
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 3.6566289985437866e-05,
+      "loss": 0.0825,
+      "step": 150000
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 3.611849965161912e-05,
+      "loss": 0.082,
+      "step": 155000
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 3.567070931780038e-05,
+      "loss": 0.0865,
+      "step": 160000
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 3.522291898398165e-05,
+      "loss": 0.0814,
+      "step": 165000
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 1.1404396210797388e-06,
+      "loss": 0.0811,
+      "step": 170000
+    },
+    {
+      "epoch": 1.0,
+      "step": 173968,
+      "total_flos": 1.727613380688722e+17,
+      "train_loss": 0.004168222484029031,
+      "train_runtime": 9992.3727,
+      "train_samples_per_second": 34.82,
+      "train_steps_per_second": 17.41
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.997033937275821e-05,
+      "loss": 0.079,
+      "step": 175000
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 4.982663478340844e-05,
+      "loss": 0.0806,
+      "step": 180000
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 4.968293019405868e-05,
+      "loss": 0.0784,
+      "step": 185000
     }
   ],
+  "max_steps": 1739680,
+  "num_train_epochs": 10,
+  "total_flos": 1.849684070324828e+17,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da11e8af50c38dbf9430e2d7bbfab617719d848605add5ab7ec3aa4da77adca5
-size 3119

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbfa8ba174e9fc1e47a9566b4253523ff02ac3c21b714c241b7469d42c08648c
+size 3183