ViswanthSai commited on
Commit
b9051ba
·
verified ·
1 Parent(s): 20d687c

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7eced52aa2c423e2da98c5c68cea8565571c95d0e0bc7a8b82c0b1f02af2b8a
3
  size 241895584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e2114ed459673c5d749b3341367ae547a46e47116edf106c055bdfeeb40c75
3
  size 241895584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a0dce4fdf7d2d43ab1ea719583b4c0129f2ef348f80168ba453433f86c022c4
3
- size 123395444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a75a00d2da06bb833cc02da4ff6c961ed115d1cbf6f02daa086895025ed79392
3
+ size 123395956
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88e269cd7a6a06e5a6a648ce7905663c7cc1b97fad46cefa26d6b9bdc5fd61c4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9e13c66b191c8148e2133ca2784e9c043469b5ff2bff6f0d53e0b29915063b
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd55b3848d82967a207e0805911c79200c6adce71e3b37fd24549a718f75738
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c801982aae9be06d302403c1fff693e53dedf89c1d3b689ee29fedad84a96d23
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae3ca1d06eff714814ac96bbe64c47bca9cc2f08c621be7ae587ea940b62b902
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad56050a9daa3d98b30fc2273c514f21e48c6fc2b3fdd6f82fd1518d879c7d05
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 200,
3
- "best_metric": 0.6404113173484802,
4
- "best_model_checkpoint": "outputs/checkpoint-200",
5
- "epoch": 0.31520882584712373,
6
  "eval_steps": 200,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -72,6 +72,70 @@
72
  "eval_samples_per_second": 6.564,
73
  "eval_steps_per_second": 1.646,
74
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  }
76
  ],
77
  "logging_steps": 25,
@@ -91,7 +155,7 @@
91
  "attributes": {}
92
  }
93
  },
94
- "total_flos": 2.3901686843572224e+16,
95
  "train_batch_size": 4,
96
  "trial_name": null,
97
  "trial_params": null
 
1
  {
2
+ "best_global_step": 400,
3
+ "best_metric": 0.6293139457702637,
4
+ "best_model_checkpoint": "outputs/checkpoint-400",
5
+ "epoch": 0.6304176516942475,
6
  "eval_steps": 200,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
72
  "eval_samples_per_second": 6.564,
73
  "eval_steps_per_second": 1.646,
74
  "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.3546099290780142,
78
+ "grad_norm": 0.3037702739238739,
79
+ "learning_rate": 0.00019077774785329087,
80
+ "loss": 0.6451,
81
+ "step": 225
82
+ },
83
+ {
84
+ "epoch": 0.39401103230890466,
85
+ "grad_norm": 0.2634023427963257,
86
+ "learning_rate": 0.00018677654533689287,
87
+ "loss": 0.6248,
88
+ "step": 250
89
+ },
90
+ {
91
+ "epoch": 0.43341213553979513,
92
+ "grad_norm": 0.2869561016559601,
93
+ "learning_rate": 0.00018211492091337042,
94
+ "loss": 0.6454,
95
+ "step": 275
96
+ },
97
+ {
98
+ "epoch": 0.4728132387706856,
99
+ "grad_norm": 0.3140774965286255,
100
+ "learning_rate": 0.00017682835235935236,
101
+ "loss": 0.6345,
102
+ "step": 300
103
+ },
104
+ {
105
+ "epoch": 0.512214342001576,
106
+ "grad_norm": 0.2564923167228699,
107
+ "learning_rate": 0.0001709570736536521,
108
+ "loss": 0.6483,
109
+ "step": 325
110
+ },
111
+ {
112
+ "epoch": 0.5516154452324665,
113
+ "grad_norm": 0.2636582553386688,
114
+ "learning_rate": 0.00016454576877239507,
115
+ "loss": 0.656,
116
+ "step": 350
117
+ },
118
+ {
119
+ "epoch": 0.5910165484633569,
120
+ "grad_norm": 0.24333670735359192,
121
+ "learning_rate": 0.00015764323161697935,
122
+ "loss": 0.6371,
123
+ "step": 375
124
+ },
125
+ {
126
+ "epoch": 0.6304176516942475,
127
+ "grad_norm": 0.4404401481151581,
128
+ "learning_rate": 0.00015030199466302353,
129
+ "loss": 0.6426,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.6304176516942475,
134
+ "eval_loss": 0.6293139457702637,
135
+ "eval_runtime": 162.8568,
136
+ "eval_samples_per_second": 6.564,
137
+ "eval_steps_per_second": 1.646,
138
+ "step": 400
139
  }
140
  ],
141
  "logging_steps": 25,
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 4.752881759404032e+16,
159
  "train_batch_size": 4,
160
  "trial_name": null,
161
  "trial_params": null