Kromtao commited on
Commit
00b4e68
·
verified ·
1 Parent(s): 5b3cf45

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d54e282f291a93f6267d6355da12cf995021e0eba4fbce372d3e8d20ebeef24
3
  size 80792096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57752341747a334acff7e7c528323be22984cd223aa4991ef1082bda00f5174e
3
  size 80792096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea8fea7f9df4ffe845d46f73ae0db4397236e8fde40ed896bfe8a32356e2a21d
3
  size 161815786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e4115ebf0a8527b2019b5f89a6a7ac019a87f4f339264b0c1e5bf3e4ecd86c
3
  size 161815786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c98b6d1fbc7ac6affec22be72fb3a942a886bb0d0bc66155ec1dc925e7b19b84
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4adf102d00d0cae570ac4ad0c98e14f03cf5c3c9e8e538ac52fc78f0a8a58c
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bae4f04e8c27e8d5a131fd03799d376a4ac32337d535dd00c3175dd79c3deccf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7b3a80d33ece2309bf5e8c3963ace6e331a62867cb982ffe0778787e5ad208
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0641917192682144,
5
  "eval_steps": 800,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -99,6 +99,42 @@
99
  "learning_rate": 3.7651019814126654e-05,
100
  "loss": 0.8901,
101
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
  ],
104
  "logging_steps": 50,
@@ -113,12 +149,12 @@
113
  "should_evaluate": false,
114
  "should_log": false,
115
  "should_save": true,
116
- "should_training_stop": false
117
  },
118
  "attributes": {}
119
  }
120
  },
121
- "total_flos": 1.1877807290151076e+18,
122
  "train_batch_size": 8,
123
  "trial_name": null,
124
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08558895902428587,
5
  "eval_steps": 800,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
99
  "learning_rate": 3.7651019814126654e-05,
100
  "loss": 0.8901,
101
  "step": 600
102
+ },
103
+ {
104
+ "epoch": 0.06954102920723226,
105
+ "grad_norm": 0.4904666244983673,
106
+ "learning_rate": 2.181685175319702e-05,
107
+ "loss": 0.8441,
108
+ "step": 650
109
+ },
110
+ {
111
+ "epoch": 0.07489033914625014,
112
+ "grad_norm": 0.5129781365394592,
113
+ "learning_rate": 9.903113209758096e-06,
114
+ "loss": 0.8732,
115
+ "step": 700
116
+ },
117
+ {
118
+ "epoch": 0.080239649085268,
119
+ "grad_norm": 0.4603961706161499,
120
+ "learning_rate": 2.5072087818176382e-06,
121
+ "loss": 0.8404,
122
+ "step": 750
123
+ },
124
+ {
125
+ "epoch": 0.08558895902428587,
126
+ "grad_norm": 0.4574665129184723,
127
+ "learning_rate": 0.0,
128
+ "loss": 0.8934,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 0.08558895902428587,
133
+ "eval_loss": 0.8988075256347656,
134
+ "eval_runtime": 1557.4072,
135
+ "eval_samples_per_second": 10.108,
136
+ "eval_steps_per_second": 1.264,
137
+ "step": 800
138
  }
139
  ],
140
  "logging_steps": 50,
 
149
  "should_evaluate": false,
150
  "should_log": false,
151
  "should_save": true,
152
+ "should_training_stop": true
153
  },
154
  "attributes": {}
155
  }
156
  },
157
+ "total_flos": 1.58196500264072e+18,
158
  "train_batch_size": 8,
159
  "trial_name": null,
160
  "trial_params": null