ViswanthSai commited on
Commit
c5f706f
·
verified ·
1 Parent(s): 6bd7404

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2e2114ed459673c5d749b3341367ae547a46e47116edf106c055bdfeeb40c75
3
  size 241895584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f3eef8107983dcdb7b94a197dbed9a47e97bf01ac4eb936a9e72d9cea52879
3
  size 241895584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a75a00d2da06bb833cc02da4ff6c961ed115d1cbf6f02daa086895025ed79392
3
  size 123395956
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519ee3684a19a9f21410539bf0e644d7e22660a5cb4b39c9cd795c4b68b2be8f
3
  size 123395956
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb9e13c66b191c8148e2133ca2784e9c043469b5ff2bff6f0d53e0b29915063b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76875aafaba767c932aae353ebc72177db545cf762563fd52735ca887bf41e31
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c801982aae9be06d302403c1fff693e53dedf89c1d3b689ee29fedad84a96d23
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32b8db4f4cba2b7b0157ab60a064591c3802a9067529d5fddbb1c6c1e99660b
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad56050a9daa3d98b30fc2273c514f21e48c6fc2b3fdd6f82fd1518d879c7d05
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f35971308ea7f5d5ad9cac94f24500885f98d8e3244f123fe54daa3c39fed470
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 400,
3
- "best_metric": 0.6293139457702637,
4
- "best_model_checkpoint": "outputs/checkpoint-400",
5
- "epoch": 0.6304176516942475,
6
  "eval_steps": 200,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -136,6 +136,70 @@
136
  "eval_samples_per_second": 6.564,
137
  "eval_steps_per_second": 1.646,
138
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "logging_steps": 25,
@@ -155,7 +219,7 @@
155
  "attributes": {}
156
  }
157
  },
158
- "total_flos": 4.752881759404032e+16,
159
  "train_batch_size": 4,
160
  "trial_name": null,
161
  "trial_params": null
 
1
  {
2
+ "best_global_step": 600,
3
+ "best_metric": 0.619783341884613,
4
+ "best_model_checkpoint": "outputs/checkpoint-600",
5
+ "epoch": 0.9456264775413712,
6
  "eval_steps": 200,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
136
  "eval_samples_per_second": 6.564,
137
  "eval_steps_per_second": 1.646,
138
  "step": 400
139
+ },
140
+ {
141
+ "epoch": 0.6698187549251379,
142
+ "grad_norm": 0.26333087682724,
143
+ "learning_rate": 0.00014257792915650728,
144
+ "loss": 0.6544,
145
+ "step": 425
146
+ },
147
+ {
148
+ "epoch": 0.7092198581560284,
149
+ "grad_norm": 0.21821129322052002,
150
+ "learning_rate": 0.00013452981989985348,
151
+ "loss": 0.6227,
152
+ "step": 450
153
+ },
154
+ {
155
+ "epoch": 0.7486209613869188,
156
+ "grad_norm": 0.25767773389816284,
157
+ "learning_rate": 0.00012621891786408648,
158
+ "loss": 0.6508,
159
+ "step": 475
160
+ },
161
+ {
162
+ "epoch": 0.7880220646178093,
163
+ "grad_norm": 0.36362728476524353,
164
+ "learning_rate": 0.00011770847403195834,
165
+ "loss": 0.6246,
166
+ "step": 500
167
+ },
168
+ {
169
+ "epoch": 0.8274231678486997,
170
+ "grad_norm": 0.2877989113330841,
171
+ "learning_rate": 0.00010906325801977804,
172
+ "loss": 0.637,
173
+ "step": 525
174
+ },
175
+ {
176
+ "epoch": 0.8668242710795903,
177
+ "grad_norm": 0.24531076848506927,
178
+ "learning_rate": 0.00010034906514152238,
179
+ "loss": 0.6278,
180
+ "step": 550
181
+ },
182
+ {
183
+ "epoch": 0.9062253743104807,
184
+ "grad_norm": 0.29258039593696594,
185
+ "learning_rate": 9.163221566676847e-05,
186
+ "loss": 0.6357,
187
+ "step": 575
188
+ },
189
+ {
190
+ "epoch": 0.9456264775413712,
191
+ "grad_norm": 0.28367385268211365,
192
+ "learning_rate": 8.297905008339677e-05,
193
+ "loss": 0.6301,
194
+ "step": 600
195
+ },
196
+ {
197
+ "epoch": 0.9456264775413712,
198
+ "eval_loss": 0.619783341884613,
199
+ "eval_runtime": 162.108,
200
+ "eval_samples_per_second": 6.594,
201
+ "eval_steps_per_second": 1.653,
202
+ "step": 600
203
  }
204
  ],
205
  "logging_steps": 25,
 
219
  "attributes": {}
220
  }
221
  },
222
+ "total_flos": 7.119511154412749e+16,
223
  "train_batch_size": 4,
224
  "trial_name": null,
225
  "trial_params": null