mbort1 commited on
Commit
1d81519
·
verified ·
1 Parent(s): f55c385

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1071652c84ae8cff079bacf083815e29a2c6ce551b00dcf67c37b264fc624e77
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f443882242ddbd40941209e4a0e6877bf6a845e499c111ee7646474bc2d5d67
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c345fef49cd111d959a45fa7def0661f57b9a328cf327bf439db1ce039df3a3b
3
  size 168155346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e048ad67b3266263900fe71524be3e7dc7a47780658388e5d17114aae41ff4d
3
  size 168155346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af90d9bd53ef6e01e741f740accadc59008352f0e7d7bbe666104823a9634cc6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61499f823566f61d141610b35eff248a50a427b5e7b93ed444e378c82d3f7bf9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:520939a2b861396046dd9c00e1116122d631161038719e48262b93fdca69303b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b5bf190dc871967c45091d9f1ab233b2d2ed62baca21fee5dfedb5718ffa5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2161431948665991,
5
  "eval_steps": 1000,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -127,6 +127,42 @@
127
  "learning_rate": 2.339555568810221e-05,
128
  "loss": 0.8341,
129
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 50,
@@ -141,12 +177,12 @@
141
  "should_evaluate": false,
142
  "should_log": false,
143
  "should_save": true,
144
- "should_training_stop": false
145
  },
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 1.246236390400721e+18,
150
  "train_batch_size": 8,
151
  "trial_name": null,
152
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2701789935832489,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
127
  "learning_rate": 2.339555568810221e-05,
128
  "loss": 0.8341,
129
  "step": 800
130
+ },
131
+ {
132
+ "epoch": 0.22965214454576158,
133
+ "grad_norm": 1.3319884538650513,
134
+ "learning_rate": 1.339745962155613e-05,
135
+ "loss": 0.8472,
136
+ "step": 850
137
+ },
138
+ {
139
+ "epoch": 0.24316109422492402,
140
+ "grad_norm": 1.5228185653686523,
141
+ "learning_rate": 6.030737921409169e-06,
142
+ "loss": 0.8233,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 0.25667004390408643,
147
+ "grad_norm": 1.154761791229248,
148
+ "learning_rate": 1.5192246987791981e-06,
149
+ "loss": 0.8262,
150
+ "step": 950
151
+ },
152
+ {
153
+ "epoch": 0.2701789935832489,
154
+ "grad_norm": 1.1170167922973633,
155
+ "learning_rate": 0.0,
156
+ "loss": 0.8293,
157
+ "step": 1000
158
+ },
159
+ {
160
+ "epoch": 0.2701789935832489,
161
+ "eval_loss": 0.8305127024650574,
162
+ "eval_runtime": 807.4769,
163
+ "eval_samples_per_second": 7.72,
164
+ "eval_steps_per_second": 0.966,
165
+ "step": 1000
166
  }
167
  ],
168
  "logging_steps": 50,
 
177
  "should_evaluate": false,
178
  "should_log": false,
179
  "should_save": true,
180
+ "should_training_stop": true
181
  },
182
  "attributes": {}
183
  }
184
  },
185
+ "total_flos": 1.5573330969388646e+18,
186
  "train_batch_size": 8,
187
  "trial_name": null,
188
  "trial_params": null