eldad-akhaumere commited on
Commit
6c5e87f
·
verified ·
1 Parent(s): 9dee314

Training in progress, step 10500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a8be296d9693102c2d75dc7be4c3c4dc69e79ecab2acc7233710ac2661a364
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85f0e09ecc93465e0a0221ba66dcfb3297029be6496ecd7742a3367c03b2dddf
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af704705ef0da7f09f726648a246c4dd765dd9cbdb61aaf391803c45ff6d1dc
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7faa3deaf7772fdd5c0b956a58b34d5abc85013a67b67c9e291879541658bcbc
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17b392e7666c071a2d6510caf8f2c7432c070724f46481df44ca2199dfdbb3d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89bc68118b2bdfe67b2a1e276ef1fcd92b1616b6e5c253bf79def97355e27b5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda99ba66ec317fa211d4111d8e4f978ee38e5bbccdd4109849ebc8aea65894b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ccdac996a7746ff581f40b984dc6b1b8e12cdd0754c04482dcb7dee59b625cb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 77.56275148495881,
3
  "best_model_checkpoint": "./whisper-small-ha-v10/checkpoint-3000",
4
- "epoch": 63.69426751592356,
5
  "eval_steps": 500,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3007,6 +3007,156 @@
3007
  "eval_wer": 81.68231461965894,
3008
  "eval_wer_ortho": 83.7109375,
3009
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3010
  }
3011
  ],
3012
  "logging_steps": 25,
@@ -3035,7 +3185,7 @@
3035
  "attributes": {}
3036
  }
3037
  },
3038
- "total_flos": 4.614249682796544e+19,
3039
  "train_batch_size": 16,
3040
  "trial_name": null,
3041
  "trial_params": null
 
1
  {
2
  "best_metric": 77.56275148495881,
3
  "best_model_checkpoint": "./whisper-small-ha-v10/checkpoint-3000",
4
+ "epoch": 66.87898089171975,
5
  "eval_steps": 500,
6
+ "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3007
  "eval_wer": 81.68231461965894,
3008
  "eval_wer_ortho": 83.7109375,
3009
  "step": 10000
3010
+ },
3011
+ {
3012
+ "epoch": 63.853503184713375,
3013
+ "grad_norm": 2.015751361846924,
3014
+ "learning_rate": 3e-05,
3015
+ "loss": 0.0041,
3016
+ "step": 10025
3017
+ },
3018
+ {
3019
+ "epoch": 64.01273885350318,
3020
+ "grad_norm": 2.141650676727295,
3021
+ "learning_rate": 3e-05,
3022
+ "loss": 0.0128,
3023
+ "step": 10050
3024
+ },
3025
+ {
3026
+ "epoch": 64.171974522293,
3027
+ "grad_norm": 0.3167494237422943,
3028
+ "learning_rate": 3e-05,
3029
+ "loss": 0.0084,
3030
+ "step": 10075
3031
+ },
3032
+ {
3033
+ "epoch": 64.3312101910828,
3034
+ "grad_norm": 1.8153035640716553,
3035
+ "learning_rate": 3e-05,
3036
+ "loss": 0.0086,
3037
+ "step": 10100
3038
+ },
3039
+ {
3040
+ "epoch": 64.49044585987261,
3041
+ "grad_norm": 0.3989582061767578,
3042
+ "learning_rate": 3e-05,
3043
+ "loss": 0.0057,
3044
+ "step": 10125
3045
+ },
3046
+ {
3047
+ "epoch": 64.64968152866243,
3048
+ "grad_norm": 1.5880628824234009,
3049
+ "learning_rate": 3e-05,
3050
+ "loss": 0.0086,
3051
+ "step": 10150
3052
+ },
3053
+ {
3054
+ "epoch": 64.80891719745223,
3055
+ "grad_norm": 0.07060195505619049,
3056
+ "learning_rate": 3e-05,
3057
+ "loss": 0.0079,
3058
+ "step": 10175
3059
+ },
3060
+ {
3061
+ "epoch": 64.96815286624204,
3062
+ "grad_norm": 1.7613017559051514,
3063
+ "learning_rate": 3e-05,
3064
+ "loss": 0.0086,
3065
+ "step": 10200
3066
+ },
3067
+ {
3068
+ "epoch": 65.12738853503184,
3069
+ "grad_norm": 0.047506798058748245,
3070
+ "learning_rate": 3e-05,
3071
+ "loss": 0.0115,
3072
+ "step": 10225
3073
+ },
3074
+ {
3075
+ "epoch": 65.28662420382166,
3076
+ "grad_norm": 1.2768458127975464,
3077
+ "learning_rate": 3e-05,
3078
+ "loss": 0.006,
3079
+ "step": 10250
3080
+ },
3081
+ {
3082
+ "epoch": 65.44585987261146,
3083
+ "grad_norm": 2.424394130706787,
3084
+ "learning_rate": 3e-05,
3085
+ "loss": 0.0291,
3086
+ "step": 10275
3087
+ },
3088
+ {
3089
+ "epoch": 65.60509554140127,
3090
+ "grad_norm": 0.09524156898260117,
3091
+ "learning_rate": 3e-05,
3092
+ "loss": 0.0129,
3093
+ "step": 10300
3094
+ },
3095
+ {
3096
+ "epoch": 65.76433121019109,
3097
+ "grad_norm": 3.080942153930664,
3098
+ "learning_rate": 3e-05,
3099
+ "loss": 0.0076,
3100
+ "step": 10325
3101
+ },
3102
+ {
3103
+ "epoch": 65.92356687898089,
3104
+ "grad_norm": 0.5182892680168152,
3105
+ "learning_rate": 3e-05,
3106
+ "loss": 0.01,
3107
+ "step": 10350
3108
+ },
3109
+ {
3110
+ "epoch": 66.0828025477707,
3111
+ "grad_norm": 4.24620246887207,
3112
+ "learning_rate": 3e-05,
3113
+ "loss": 0.0074,
3114
+ "step": 10375
3115
+ },
3116
+ {
3117
+ "epoch": 66.24203821656052,
3118
+ "grad_norm": 0.21629653871059418,
3119
+ "learning_rate": 3e-05,
3120
+ "loss": 0.0086,
3121
+ "step": 10400
3122
+ },
3123
+ {
3124
+ "epoch": 66.40127388535032,
3125
+ "grad_norm": 0.24145953357219696,
3126
+ "learning_rate": 3e-05,
3127
+ "loss": 0.0112,
3128
+ "step": 10425
3129
+ },
3130
+ {
3131
+ "epoch": 66.56050955414013,
3132
+ "grad_norm": 0.3760283291339874,
3133
+ "learning_rate": 3e-05,
3134
+ "loss": 0.0072,
3135
+ "step": 10450
3136
+ },
3137
+ {
3138
+ "epoch": 66.71974522292993,
3139
+ "grad_norm": 0.07463686168193817,
3140
+ "learning_rate": 3e-05,
3141
+ "loss": 0.0138,
3142
+ "step": 10475
3143
+ },
3144
+ {
3145
+ "epoch": 66.87898089171975,
3146
+ "grad_norm": 2.895296335220337,
3147
+ "learning_rate": 3e-05,
3148
+ "loss": 0.0093,
3149
+ "step": 10500
3150
+ },
3151
+ {
3152
+ "epoch": 66.87898089171975,
3153
+ "eval_loss": 2.654149293899536,
3154
+ "eval_runtime": 146.7787,
3155
+ "eval_samples_per_second": 4.497,
3156
+ "eval_steps_per_second": 0.286,
3157
+ "eval_wer": 80.34106150603564,
3158
+ "eval_wer_ortho": 82.44140625,
3159
+ "step": 10500
3160
  }
3161
  ],
3162
  "logging_steps": 25,
 
3185
  "attributes": {}
3186
  }
3187
  },
3188
+ "total_flos": 4.84477170057216e+19,
3189
  "train_batch_size": 16,
3190
  "trial_name": null,
3191
  "trial_params": null