Training in progress, epoch 1, checkpoint

Browse files

Files changed (8) hide show

checkpoint-392/config.json +71 -0
checkpoint-392/model.safetensors +3 -0
checkpoint-392/optimizer.pt +3 -0
checkpoint-392/preprocessor_config.json +14 -0
checkpoint-392/rng_state.pth +3 -0
checkpoint-392/scheduler.pt +3 -0
checkpoint-392/trainer_state.json +588 -0
checkpoint-392/training_args.bin +3 -0

checkpoint-392/config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "_name_or_path": "vinai/PhoWhisper-small",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "ViSpeechClassification"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "id2label": {
+    "0": "male, northern dialect",
+    "1": "male, central dialect",
+    "2": "male, highland central dialect",
+    "3": "male, southern dialect",
+    "4": "female, northern dialect",
+    "5": "female, central dialect",
+    "6": "female, highland central dialect",
+    "7": "female, southern dialect"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "female, central dialect": "5",
+    "female, highland central dialect": "6",
+    "female, northern dialect": "4",
+    "female, southern dialect": "7",
+    "male, central dialect": "1",
+    "male, highland central dialect": "2",
+    "male, northern dialect": "0",
+    "male, southern dialect": "3"
+  },
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

checkpoint-392/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb59ab58bb5ff3eafa91c958ea9b8f42f1fa8b72ed927daddf1923795ef1d594
+size 358419776

checkpoint-392/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b0a204171bf2a714b715d9823a351f76897f2b2a388ea04d12e5451646dce2
+size 11572486

checkpoint-392/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-392/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac4f2223ae20a15f214a3351b5b55f85f2c47207d3b172dfffc1d6e3c643d1ed
+size 14244

checkpoint-392/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d88888ca2b44ec47bab12de6ac3870d08ad1350b6518691df634bee808eee7a1
+size 1064

checkpoint-392/trainer_state.json ADDED Viewed

	@@ -0,0 +1,588 @@

+{
+  "best_metric": 0.0857774206508638,
+  "best_model_checkpoint": "PhoWhisper-small-vispeech-classifier-v3/checkpoint-392",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 392,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.012755102040816327,
+      "grad_norm": 48589.94921875,
+      "learning_rate": 6.377551020408164e-08,
+      "loss": 2.0884,
+      "step": 5
+    },
+    {
+      "epoch": 0.025510204081632654,
+      "grad_norm": 50532.125,
+      "learning_rate": 1.2755102040816328e-07,
+      "loss": 2.0887,
+      "step": 10
+    },
+    {
+      "epoch": 0.03826530612244898,
+      "grad_norm": 50246.4296875,
+      "learning_rate": 1.913265306122449e-07,
+      "loss": 2.0885,
+      "step": 15
+    },
+    {
+      "epoch": 0.05102040816326531,
+      "grad_norm": 57370.44140625,
+      "learning_rate": 2.5510204081632656e-07,
+      "loss": 2.0908,
+      "step": 20
+    },
+    {
+      "epoch": 0.06377551020408163,
+      "grad_norm": 40133.53125,
+      "learning_rate": 3.188775510204082e-07,
+      "loss": 2.0889,
+      "step": 25
+    },
+    {
+      "epoch": 0.07653061224489796,
+      "grad_norm": 58377.3671875,
+      "learning_rate": 3.826530612244898e-07,
+      "loss": 2.0874,
+      "step": 30
+    },
+    {
+      "epoch": 0.08928571428571429,
+      "grad_norm": 50888.7421875,
+      "learning_rate": 4.4642857142857147e-07,
+      "loss": 2.088,
+      "step": 35
+    },
+    {
+      "epoch": 0.10204081632653061,
+      "grad_norm": 44107.75390625,
+      "learning_rate": 5.102040816326531e-07,
+      "loss": 2.0872,
+      "step": 40
+    },
+    {
+      "epoch": 0.11479591836734694,
+      "grad_norm": 62939.18359375,
+      "learning_rate": 5.739795918367347e-07,
+      "loss": 2.0873,
+      "step": 45
+    },
+    {
+      "epoch": 0.12755102040816327,
+      "grad_norm": 42077.53515625,
+      "learning_rate": 6.377551020408164e-07,
+      "loss": 2.0852,
+      "step": 50
+    },
+    {
+      "epoch": 0.14030612244897958,
+      "grad_norm": 50233.00390625,
+      "learning_rate": 7.015306122448979e-07,
+      "loss": 2.0861,
+      "step": 55
+    },
+    {
+      "epoch": 0.15306122448979592,
+      "grad_norm": 60839.4765625,
+      "learning_rate": 7.653061224489796e-07,
+      "loss": 2.0856,
+      "step": 60
+    },
+    {
+      "epoch": 0.16581632653061223,
+      "grad_norm": 57499.3515625,
+      "learning_rate": 8.290816326530612e-07,
+      "loss": 2.0838,
+      "step": 65
+    },
+    {
+      "epoch": 0.17857142857142858,
+      "grad_norm": 65030.765625,
+      "learning_rate": 8.928571428571429e-07,
+      "loss": 2.083,
+      "step": 70
+    },
+    {
+      "epoch": 0.1913265306122449,
+      "grad_norm": 53192.35546875,
+      "learning_rate": 9.566326530612244e-07,
+      "loss": 2.0841,
+      "step": 75
+    },
+    {
+      "epoch": 0.20408163265306123,
+      "grad_norm": 52991.80078125,
+      "learning_rate": 1.0204081632653063e-06,
+      "loss": 2.0817,
+      "step": 80
+    },
+    {
+      "epoch": 0.21683673469387754,
+      "grad_norm": 48656.234375,
+      "learning_rate": 1.0841836734693879e-06,
+      "loss": 2.0813,
+      "step": 85
+    },
+    {
+      "epoch": 0.22959183673469388,
+      "grad_norm": 63459.2421875,
+      "learning_rate": 1.1479591836734695e-06,
+      "loss": 2.081,
+      "step": 90
+    },
+    {
+      "epoch": 0.2423469387755102,
+      "grad_norm": 47800.3515625,
+      "learning_rate": 1.211734693877551e-06,
+      "loss": 2.0775,
+      "step": 95
+    },
+    {
+      "epoch": 0.25510204081632654,
+      "grad_norm": 64353.37890625,
+      "learning_rate": 1.2755102040816329e-06,
+      "loss": 2.0772,
+      "step": 100
+    },
+    {
+      "epoch": 0.26785714285714285,
+      "grad_norm": 59652.29296875,
+      "learning_rate": 1.3392857142857143e-06,
+      "loss": 2.0779,
+      "step": 105
+    },
+    {
+      "epoch": 0.28061224489795916,
+      "grad_norm": 44752.69140625,
+      "learning_rate": 1.4030612244897959e-06,
+      "loss": 2.0764,
+      "step": 110
+    },
+    {
+      "epoch": 0.29336734693877553,
+      "grad_norm": 49883.0,
+      "learning_rate": 1.4668367346938777e-06,
+      "loss": 2.0763,
+      "step": 115
+    },
+    {
+      "epoch": 0.30612244897959184,
+      "grad_norm": 65950.28125,
+      "learning_rate": 1.5306122448979593e-06,
+      "loss": 2.0739,
+      "step": 120
+    },
+    {
+      "epoch": 0.31887755102040816,
+      "grad_norm": 61354.83203125,
+      "learning_rate": 1.5943877551020409e-06,
+      "loss": 2.0706,
+      "step": 125
+    },
+    {
+      "epoch": 0.33163265306122447,
+      "grad_norm": 53169.91796875,
+      "learning_rate": 1.6581632653061225e-06,
+      "loss": 2.0709,
+      "step": 130
+    },
+    {
+      "epoch": 0.34438775510204084,
+      "grad_norm": 55883.31640625,
+      "learning_rate": 1.7219387755102043e-06,
+      "loss": 2.0695,
+      "step": 135
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 38134.765625,
+      "learning_rate": 1.7857142857142859e-06,
+      "loss": 2.0675,
+      "step": 140
+    },
+    {
+      "epoch": 0.36989795918367346,
+      "grad_norm": 53920.7265625,
+      "learning_rate": 1.8494897959183675e-06,
+      "loss": 2.0659,
+      "step": 145
+    },
+    {
+      "epoch": 0.3826530612244898,
+      "grad_norm": 68777.28125,
+      "learning_rate": 1.913265306122449e-06,
+      "loss": 2.0638,
+      "step": 150
+    },
+    {
+      "epoch": 0.39540816326530615,
+      "grad_norm": 46247.8984375,
+      "learning_rate": 1.977040816326531e-06,
+      "loss": 2.0656,
+      "step": 155
+    },
+    {
+      "epoch": 0.40816326530612246,
+      "grad_norm": 49636.04296875,
+      "learning_rate": 2.0408163265306125e-06,
+      "loss": 2.0621,
+      "step": 160
+    },
+    {
+      "epoch": 0.42091836734693877,
+      "grad_norm": 51880.3515625,
+      "learning_rate": 2.104591836734694e-06,
+      "loss": 2.057,
+      "step": 165
+    },
+    {
+      "epoch": 0.4336734693877551,
+      "grad_norm": 52808.734375,
+      "learning_rate": 2.1683673469387757e-06,
+      "loss": 2.0585,
+      "step": 170
+    },
+    {
+      "epoch": 0.44642857142857145,
+      "grad_norm": 46398.125,
+      "learning_rate": 2.2321428571428573e-06,
+      "loss": 2.0505,
+      "step": 175
+    },
+    {
+      "epoch": 0.45918367346938777,
+      "grad_norm": 59347.2578125,
+      "learning_rate": 2.295918367346939e-06,
+      "loss": 2.0487,
+      "step": 180
+    },
+    {
+      "epoch": 0.4719387755102041,
+      "grad_norm": 58343.73828125,
+      "learning_rate": 2.3596938775510205e-06,
+      "loss": 2.0563,
+      "step": 185
+    },
+    {
+      "epoch": 0.4846938775510204,
+      "grad_norm": 43836.42578125,
+      "learning_rate": 2.423469387755102e-06,
+      "loss": 2.0486,
+      "step": 190
+    },
+    {
+      "epoch": 0.49744897959183676,
+      "grad_norm": 69661.5859375,
+      "learning_rate": 2.487244897959184e-06,
+      "loss": 2.0443,
+      "step": 195
+    },
+    {
+      "epoch": 0.5102040816326531,
+      "grad_norm": 53888.24609375,
+      "learning_rate": 2.5510204081632657e-06,
+      "loss": 2.0475,
+      "step": 200
+    },
+    {
+      "epoch": 0.5229591836734694,
+      "grad_norm": 50369.1640625,
+      "learning_rate": 2.6147959183673473e-06,
+      "loss": 2.035,
+      "step": 205
+    },
+    {
+      "epoch": 0.5357142857142857,
+      "grad_norm": 62733.9609375,
+      "learning_rate": 2.6785714285714285e-06,
+      "loss": 2.0354,
+      "step": 210
+    },
+    {
+      "epoch": 0.548469387755102,
+      "grad_norm": 48340.67578125,
+      "learning_rate": 2.74234693877551e-06,
+      "loss": 2.0373,
+      "step": 215
+    },
+    {
+      "epoch": 0.5612244897959183,
+      "grad_norm": 50353.734375,
+      "learning_rate": 2.8061224489795917e-06,
+      "loss": 2.0389,
+      "step": 220
+    },
+    {
+      "epoch": 0.5739795918367347,
+      "grad_norm": 62351.765625,
+      "learning_rate": 2.869897959183674e-06,
+      "loss": 2.0263,
+      "step": 225
+    },
+    {
+      "epoch": 0.5867346938775511,
+      "grad_norm": 73855.609375,
+      "learning_rate": 2.9336734693877553e-06,
+      "loss": 2.0269,
+      "step": 230
+    },
+    {
+      "epoch": 0.5994897959183674,
+      "grad_norm": 84417.859375,
+      "learning_rate": 2.997448979591837e-06,
+      "loss": 2.0311,
+      "step": 235
+    },
+    {
+      "epoch": 0.6122448979591837,
+      "grad_norm": 57347.55078125,
+      "learning_rate": 3.0612244897959185e-06,
+      "loss": 2.0239,
+      "step": 240
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 65611.1875,
+      "learning_rate": 3.125e-06,
+      "loss": 2.0064,
+      "step": 245
+    },
+    {
+      "epoch": 0.6377551020408163,
+      "grad_norm": 70810.75,
+      "learning_rate": 3.1887755102040818e-06,
+      "loss": 2.0224,
+      "step": 250
+    },
+    {
+      "epoch": 0.6505102040816326,
+      "grad_norm": 46479.8828125,
+      "learning_rate": 3.2525510204081634e-06,
+      "loss": 2.0057,
+      "step": 255
+    },
+    {
+      "epoch": 0.6632653061224489,
+      "grad_norm": 34124.89453125,
+      "learning_rate": 3.316326530612245e-06,
+      "loss": 2.0135,
+      "step": 260
+    },
+    {
+      "epoch": 0.6760204081632653,
+      "grad_norm": 53196.58203125,
+      "learning_rate": 3.3801020408163266e-06,
+      "loss": 2.0087,
+      "step": 265
+    },
+    {
+      "epoch": 0.6887755102040817,
+      "grad_norm": 64588.515625,
+      "learning_rate": 3.4438775510204086e-06,
+      "loss": 2.0006,
+      "step": 270
+    },
+    {
+      "epoch": 0.701530612244898,
+      "grad_norm": 73780.546875,
+      "learning_rate": 3.50765306122449e-06,
+      "loss": 1.9965,
+      "step": 275
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 63101.05859375,
+      "learning_rate": 3.5714285714285718e-06,
+      "loss": 2.0005,
+      "step": 280
+    },
+    {
+      "epoch": 0.7270408163265306,
+      "grad_norm": 78616.109375,
+      "learning_rate": 3.6352040816326534e-06,
+      "loss": 1.9904,
+      "step": 285
+    },
+    {
+      "epoch": 0.7397959183673469,
+      "grad_norm": 79120.6484375,
+      "learning_rate": 3.698979591836735e-06,
+      "loss": 1.9891,
+      "step": 290
+    },
+    {
+      "epoch": 0.7525510204081632,
+      "grad_norm": 92711.09375,
+      "learning_rate": 3.7627551020408166e-06,
+      "loss": 1.9611,
+      "step": 295
+    },
+    {
+      "epoch": 0.7653061224489796,
+      "grad_norm": 77935.4765625,
+      "learning_rate": 3.826530612244898e-06,
+      "loss": 1.9746,
+      "step": 300
+    },
+    {
+      "epoch": 0.7780612244897959,
+      "grad_norm": 85825.375,
+      "learning_rate": 3.89030612244898e-06,
+      "loss": 1.9753,
+      "step": 305
+    },
+    {
+      "epoch": 0.7908163265306123,
+      "grad_norm": 65178.671875,
+      "learning_rate": 3.954081632653062e-06,
+      "loss": 1.9534,
+      "step": 310
+    },
+    {
+      "epoch": 0.8035714285714286,
+      "grad_norm": 71973.671875,
+      "learning_rate": 4.017857142857143e-06,
+      "loss": 1.9474,
+      "step": 315
+    },
+    {
+      "epoch": 0.8163265306122449,
+      "grad_norm": 88983.125,
+      "learning_rate": 4.081632653061225e-06,
+      "loss": 1.9492,
+      "step": 320
+    },
+    {
+      "epoch": 0.8290816326530612,
+      "grad_norm": 76345.84375,
+      "learning_rate": 4.145408163265306e-06,
+      "loss": 1.9513,
+      "step": 325
+    },
+    {
+      "epoch": 0.8418367346938775,
+      "grad_norm": 64801.640625,
+      "learning_rate": 4.209183673469388e-06,
+      "loss": 1.9583,
+      "step": 330
+    },
+    {
+      "epoch": 0.8545918367346939,
+      "grad_norm": 72903.8203125,
+      "learning_rate": 4.272959183673469e-06,
+      "loss": 1.9579,
+      "step": 335
+    },
+    {
+      "epoch": 0.8673469387755102,
+      "grad_norm": 65284.796875,
+      "learning_rate": 4.336734693877551e-06,
+      "loss": 1.9272,
+      "step": 340
+    },
+    {
+      "epoch": 0.8801020408163265,
+      "grad_norm": 65237.38671875,
+      "learning_rate": 4.400510204081633e-06,
+      "loss": 1.9065,
+      "step": 345
+    },
+    {
+      "epoch": 0.8928571428571429,
+      "grad_norm": 79606.9296875,
+      "learning_rate": 4.464285714285715e-06,
+      "loss": 1.9013,
+      "step": 350
+    },
+    {
+      "epoch": 0.9056122448979592,
+      "grad_norm": 76137.6328125,
+      "learning_rate": 4.528061224489797e-06,
+      "loss": 1.8892,
+      "step": 355
+    },
+    {
+      "epoch": 0.9183673469387755,
+      "grad_norm": 40717.60546875,
+      "learning_rate": 4.591836734693878e-06,
+      "loss": 1.9072,
+      "step": 360
+    },
+    {
+      "epoch": 0.9311224489795918,
+      "grad_norm": 110416.125,
+      "learning_rate": 4.65561224489796e-06,
+      "loss": 1.9024,
+      "step": 365
+    },
+    {
+      "epoch": 0.9438775510204082,
+      "grad_norm": 99101.6875,
+      "learning_rate": 4.719387755102041e-06,
+      "loss": 1.8979,
+      "step": 370
+    },
+    {
+      "epoch": 0.9566326530612245,
+      "grad_norm": 86707.7109375,
+      "learning_rate": 4.783163265306123e-06,
+      "loss": 1.878,
+      "step": 375
+    },
+    {
+      "epoch": 0.9693877551020408,
+      "grad_norm": 85074.75,
+      "learning_rate": 4.846938775510204e-06,
+      "loss": 1.871,
+      "step": 380
+    },
+    {
+      "epoch": 0.9821428571428571,
+      "grad_norm": 47668.7109375,
+      "learning_rate": 4.910714285714286e-06,
+      "loss": 1.8762,
+      "step": 385
+    },
+    {
+      "epoch": 0.9948979591836735,
+      "grad_norm": 133846.6875,
+      "learning_rate": 4.974489795918368e-06,
+      "loss": 1.8344,
+      "step": 390
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.0857774206508638,
+      "eval_loss": 1.9574071168899536,
+      "eval_runtime": 313.0247,
+      "eval_samples_per_second": 15.903,
+      "eval_steps_per_second": 0.53,
+      "step": 392
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 3920,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.49679069336576e+18,
+  "train_batch_size": 30,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-392/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62d4afa24b2e5344ef9e2ace3afc7237855a36ba40c6701cd39cbf7db81b4cdc
+size 5176