chchen commited on
Commit
ff11116
·
verified ·
1 Parent(s): e023cc9

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: gemma
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
- - llama-factory
8
  - generated_from_trainer
9
  base_model: google/gemma-7b-it
10
  model-index:
@@ -17,7 +18,19 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # Gemma-7B-It-ORPO
19
 
20
- This model is a fine-tuned version of [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  ## Model description
23
 
 
2
  license: gemma
3
  library_name: peft
4
  tags:
5
+ - llama-factory
6
+ - lora
7
  - trl
8
  - dpo
 
9
  - generated_from_trainer
10
  base_model: google/gemma-7b-it
11
  model-index:
 
18
 
19
  # Gemma-7B-It-ORPO
20
 
21
+ This model is a fine-tuned version of [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it) on the dpo_mix_en dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 1.7794
24
+ - Rewards/chosen: -0.1716
25
+ - Rewards/rejected: -0.1920
26
+ - Rewards/accuracies: 0.5600
27
+ - Rewards/margins: 0.0204
28
+ - Logps/rejected: -1.9200
29
+ - Logps/chosen: -1.7164
30
+ - Logits/rejected: 236.5044
31
+ - Logits/chosen: 236.6770
32
+ - Sft Loss: 1.7164
33
+ - Odds Ratio Loss: 0.6304
34
 
35
  ## Model description
36
 
all_results.json CHANGED
@@ -1,8 +1,22 @@
1
  {
2
  "epoch": 2.986666666666667,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 2.2023536924295168e+17,
4
- "train_loss": 1.8065733909606934,
5
- "train_runtime": 5653.336,
6
- "train_samples_per_second": 0.478,
7
  "train_steps_per_second": 0.03
8
  }
 
1
  {
2
  "epoch": 2.986666666666667,
3
+ "eval_logits/chosen": 236.67703247070312,
4
+ "eval_logits/rejected": 236.50440979003906,
5
+ "eval_logps/chosen": -1.7163937091827393,
6
+ "eval_logps/rejected": -1.9199703931808472,
7
+ "eval_loss": 1.7794384956359863,
8
+ "eval_odds_ratio_loss": 0.6304484009742737,
9
+ "eval_rewards/accuracies": 0.5600000023841858,
10
+ "eval_rewards/chosen": -0.1716393679380417,
11
+ "eval_rewards/margins": 0.020357677713036537,
12
+ "eval_rewards/rejected": -0.19199703633785248,
13
+ "eval_runtime": 59.9808,
14
+ "eval_samples_per_second": 1.667,
15
+ "eval_sft_loss": 1.7163937091827393,
16
+ "eval_steps_per_second": 1.667,
17
  "total_flos": 2.2023536924295168e+17,
18
+ "train_loss": 1.8747729460398357,
19
+ "train_runtime": 5642.3245,
20
+ "train_samples_per_second": 0.479,
21
  "train_steps_per_second": 0.03
22
  }
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986666666666667,
3
+ "eval_logits/chosen": 236.67703247070312,
4
+ "eval_logits/rejected": 236.50440979003906,
5
+ "eval_logps/chosen": -1.7163937091827393,
6
+ "eval_logps/rejected": -1.9199703931808472,
7
+ "eval_loss": 1.7794384956359863,
8
+ "eval_odds_ratio_loss": 0.6304484009742737,
9
+ "eval_rewards/accuracies": 0.5600000023841858,
10
+ "eval_rewards/chosen": -0.1716393679380417,
11
+ "eval_rewards/margins": 0.020357677713036537,
12
+ "eval_rewards/rejected": -0.19199703633785248,
13
+ "eval_runtime": 59.9808,
14
+ "eval_samples_per_second": 1.667,
15
+ "eval_sft_loss": 1.7163937091827393,
16
+ "eval_steps_per_second": 1.667
17
+ }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.986666666666667,
3
  "total_flos": 2.2023536924295168e+17,
4
- "train_loss": 1.8065733909606934,
5
- "train_runtime": 5653.336,
6
- "train_samples_per_second": 0.478,
7
  "train_steps_per_second": 0.03
8
  }
 
1
  {
2
  "epoch": 2.986666666666667,
3
  "total_flos": 2.2023536924295168e+17,
4
+ "train_loss": 1.8747729460398357,
5
+ "train_runtime": 5642.3245,
6
+ "train_samples_per_second": 0.479,
7
  "train_steps_per_second": 0.03
8
  }
trainer_state.json CHANGED
@@ -10,283 +10,283 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.17777777777777778,
13
- "grad_norm": 1.9894839525222778,
14
  "learning_rate": 4.957230266673969e-06,
15
- "logits/chosen": 218.2901153564453,
16
- "logits/rejected": 217.98861694335938,
17
- "logps/chosen": -2.0115113258361816,
18
- "logps/rejected": -2.2237343788146973,
19
- "loss": 2.0742,
20
- "odds_ratio_loss": 0.6265951991081238,
21
- "rewards/accuracies": 0.53125,
22
- "rewards/chosen": -0.20115113258361816,
23
- "rewards/margins": 0.021222341805696487,
24
- "rewards/rejected": -0.22237345576286316,
25
- "sft_loss": 2.0115113258361816,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 0.35555555555555557,
30
- "grad_norm": 1.8634482622146606,
31
  "learning_rate": 4.828686741593921e-06,
32
- "logits/chosen": 220.6365509033203,
33
- "logits/rejected": 220.3389129638672,
34
- "logps/chosen": -2.0625388622283936,
35
- "logps/rejected": -2.3297858238220215,
36
- "loss": 2.1265,
37
- "odds_ratio_loss": 0.6394721865653992,
38
  "rewards/accuracies": 0.543749988079071,
39
- "rewards/chosen": -0.20625391602516174,
40
- "rewards/margins": 0.026724692434072495,
41
- "rewards/rejected": -0.23297858238220215,
42
- "sft_loss": 2.0625388622283936,
43
  "step": 20
44
  },
45
  {
46
  "epoch": 0.5333333333333333,
47
- "grad_norm": 1.5888192653656006,
48
  "learning_rate": 4.618852307232078e-06,
49
- "logits/chosen": 223.16909790039062,
50
- "logits/rejected": 223.3883819580078,
51
- "logps/chosen": -1.8862736225128174,
52
- "logps/rejected": -2.1588046550750732,
53
- "loss": 1.9475,
54
- "odds_ratio_loss": 0.612014651298523,
55
- "rewards/accuracies": 0.5375000238418579,
56
- "rewards/chosen": -0.18862736225128174,
57
- "rewards/margins": 0.027253109961748123,
58
- "rewards/rejected": -0.21588046848773956,
59
- "sft_loss": 1.8862736225128174,
60
  "step": 30
61
  },
62
  {
63
  "epoch": 0.7111111111111111,
64
- "grad_norm": 2.911007881164551,
65
  "learning_rate": 4.335051964269395e-06,
66
- "logits/chosen": 219.7681884765625,
67
- "logits/rejected": 220.56063842773438,
68
- "logps/chosen": -1.7726600170135498,
69
- "logps/rejected": -2.0512185096740723,
70
- "loss": 1.8335,
71
- "odds_ratio_loss": 0.6088349223136902,
72
- "rewards/accuracies": 0.550000011920929,
73
- "rewards/chosen": -0.17726600170135498,
74
- "rewards/margins": 0.02785584330558777,
75
- "rewards/rejected": -0.20512184500694275,
76
- "sft_loss": 1.7726600170135498,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 0.8888888888888888,
81
- "grad_norm": 3.1844053268432617,
82
  "learning_rate": 3.987192750660719e-06,
83
- "logits/chosen": 227.5769500732422,
84
- "logits/rejected": 227.42721557617188,
85
- "logps/chosen": -1.982785940170288,
86
- "logps/rejected": -2.3187923431396484,
87
- "loss": 2.0471,
88
- "odds_ratio_loss": 0.6428849697113037,
89
  "rewards/accuracies": 0.5375000238418579,
90
- "rewards/chosen": -0.19827860593795776,
91
- "rewards/margins": 0.033600639551877975,
92
- "rewards/rejected": -0.23187923431396484,
93
- "sft_loss": 1.982785940170288,
94
  "step": 50
95
  },
96
  {
97
  "epoch": 1.0666666666666667,
98
- "grad_norm": 3.250999689102173,
99
  "learning_rate": 3.587417902020876e-06,
100
- "logits/chosen": 229.1508331298828,
101
- "logits/rejected": 230.65234375,
102
- "logps/chosen": -1.8027265071868896,
103
- "logps/rejected": -2.109091281890869,
104
- "loss": 1.862,
105
- "odds_ratio_loss": 0.5927264094352722,
106
- "rewards/accuracies": 0.5562499761581421,
107
- "rewards/chosen": -0.1802726536989212,
108
- "rewards/margins": 0.030636483803391457,
109
- "rewards/rejected": -0.2109091579914093,
110
- "sft_loss": 1.8027265071868896,
111
  "step": 60
112
  },
113
  {
114
  "epoch": 1.2444444444444445,
115
- "grad_norm": 2.524855375289917,
116
  "learning_rate": 3.1496829497545268e-06,
117
- "logits/chosen": 229.8919219970703,
118
- "logits/rejected": 229.6911163330078,
119
- "logps/chosen": -1.722979187965393,
120
- "logps/rejected": -1.955990195274353,
121
- "loss": 1.7853,
122
- "odds_ratio_loss": 0.6227248311042786,
123
- "rewards/accuracies": 0.581250011920929,
124
- "rewards/chosen": -0.17229792475700378,
125
- "rewards/margins": 0.02330111339688301,
126
- "rewards/rejected": -0.1955990493297577,
127
- "sft_loss": 1.722979187965393,
128
  "step": 70
129
  },
130
  {
131
  "epoch": 1.4222222222222223,
132
- "grad_norm": 1.4623929262161255,
133
  "learning_rate": 2.6892685546987724e-06,
134
- "logits/chosen": 234.3847198486328,
135
- "logits/rejected": 233.77871704101562,
136
- "logps/chosen": -1.7393592596054077,
137
- "logps/rejected": -1.9893379211425781,
138
- "loss": 1.8004,
139
- "odds_ratio_loss": 0.6108058094978333,
140
- "rewards/accuracies": 0.581250011920929,
141
- "rewards/chosen": -0.1739359200000763,
142
- "rewards/margins": 0.02499789372086525,
143
- "rewards/rejected": -0.19893380999565125,
144
- "sft_loss": 1.7393592596054077,
145
  "step": 80
146
  },
147
  {
148
  "epoch": 1.6,
149
- "grad_norm": 1.540860891342163,
150
  "learning_rate": 2.2222470825144806e-06,
151
- "logits/chosen": 231.958251953125,
152
- "logits/rejected": 232.3849334716797,
153
- "logps/chosen": -1.5855820178985596,
154
- "logps/rejected": -1.9024156332015991,
155
- "loss": 1.647,
156
- "odds_ratio_loss": 0.6140419244766235,
157
  "rewards/accuracies": 0.543749988079071,
158
- "rewards/chosen": -0.158558189868927,
159
- "rewards/margins": 0.03168336674571037,
160
- "rewards/rejected": -0.19024157524108887,
161
- "sft_loss": 1.5855820178985596,
162
  "step": 90
163
  },
164
  {
165
  "epoch": 1.7777777777777777,
166
- "grad_norm": 1.0507925748825073,
167
  "learning_rate": 1.7649215418673847e-06,
168
- "logits/chosen": 235.5908203125,
169
- "logits/rejected": 235.9726104736328,
170
- "logps/chosen": -1.67770254611969,
171
- "logps/rejected": -1.9119056463241577,
172
- "loss": 1.7403,
173
- "odds_ratio_loss": 0.6257372498512268,
174
- "rewards/accuracies": 0.59375,
175
- "rewards/chosen": -0.16777023673057556,
176
- "rewards/margins": 0.0234203077852726,
177
- "rewards/rejected": -0.19119055569171906,
178
- "sft_loss": 1.67770254611969,
179
  "step": 100
180
  },
181
  {
182
  "epoch": 1.9555555555555557,
183
- "grad_norm": 1.1329325437545776,
184
  "learning_rate": 1.3332564712129845e-06,
185
- "logits/chosen": 236.5535125732422,
186
- "logits/rejected": 236.4635772705078,
187
- "logps/chosen": -1.661228895187378,
188
- "logps/rejected": -1.8796217441558838,
189
- "loss": 1.7239,
190
- "odds_ratio_loss": 0.6264489889144897,
191
- "rewards/accuracies": 0.5062500238418579,
192
- "rewards/chosen": -0.16612288355827332,
193
- "rewards/margins": 0.021839287132024765,
194
- "rewards/rejected": -0.18796217441558838,
195
- "sft_loss": 1.661228895187378,
196
  "step": 110
197
  },
198
  {
199
  "epoch": 2.1333333333333333,
200
- "grad_norm": 3.1466641426086426,
201
  "learning_rate": 9.423206410612498e-07,
202
- "logits/chosen": 234.2484130859375,
203
- "logits/rejected": 235.138427734375,
204
- "logps/chosen": -1.6647857427597046,
205
- "logps/rejected": -1.900854468345642,
206
- "loss": 1.7291,
207
- "odds_ratio_loss": 0.6434910893440247,
208
  "rewards/accuracies": 0.53125,
209
- "rewards/chosen": -0.16647860407829285,
210
- "rewards/margins": 0.02360684797167778,
211
- "rewards/rejected": -0.19008544087409973,
212
- "sft_loss": 1.6647857427597046,
213
  "step": 120
214
  },
215
  {
216
  "epoch": 2.311111111111111,
217
- "grad_norm": 0.8913648128509521,
218
  "learning_rate": 6.057610261367044e-07,
219
- "logits/chosen": 234.11795043945312,
220
- "logits/rejected": 233.8062744140625,
221
- "logps/chosen": -1.560727834701538,
222
- "logps/rejected": -1.7592264413833618,
223
- "loss": 1.6236,
224
- "odds_ratio_loss": 0.6284235119819641,
225
- "rewards/accuracies": 0.5687500238418579,
226
- "rewards/chosen": -0.15607279539108276,
227
- "rewards/margins": 0.019849851727485657,
228
- "rewards/rejected": -0.17592264711856842,
229
- "sft_loss": 1.560727834701538,
230
  "step": 130
231
  },
232
  {
233
  "epoch": 2.488888888888889,
234
- "grad_norm": 1.3135228157043457,
235
  "learning_rate": 3.3532641026504415e-07,
236
- "logits/chosen": 238.02099609375,
237
- "logits/rejected": 237.72402954101562,
238
- "logps/chosen": -1.5137670040130615,
239
- "logps/rejected": -1.881291389465332,
240
- "loss": 1.5735,
241
- "odds_ratio_loss": 0.5971778035163879,
242
- "rewards/accuracies": 0.5874999761581421,
243
- "rewards/chosen": -0.15137669444084167,
244
- "rewards/margins": 0.036752425134181976,
245
- "rewards/rejected": -0.18812914192676544,
246
- "sft_loss": 1.5137670040130615,
247
  "step": 140
248
  },
249
  {
250
  "epoch": 2.6666666666666665,
251
- "grad_norm": 2.724855661392212,
252
  "learning_rate": 1.4045725421448332e-07,
253
- "logits/chosen": 238.43264770507812,
254
- "logits/rejected": 238.6967010498047,
255
- "logps/chosen": -1.7582404613494873,
256
- "logps/rejected": -2.0160341262817383,
257
- "loss": 1.8172,
258
- "odds_ratio_loss": 0.5895546674728394,
259
- "rewards/accuracies": 0.5874999761581421,
260
- "rewards/chosen": -0.17582406103610992,
261
- "rewards/margins": 0.025779366493225098,
262
- "rewards/rejected": -0.20160344243049622,
263
- "sft_loss": 1.7582404613494873,
264
  "step": 150
265
  },
266
  {
267
  "epoch": 2.8444444444444446,
268
- "grad_norm": 1.488288402557373,
269
  "learning_rate": 2.7956143581177874e-08,
270
- "logits/chosen": 237.65185546875,
271
- "logits/rejected": 237.43270874023438,
272
- "logps/chosen": -1.4948513507843018,
273
- "logps/rejected": -1.7622127532958984,
274
- "loss": 1.5534,
275
- "odds_ratio_loss": 0.5855392217636108,
276
- "rewards/accuracies": 0.612500011920929,
277
- "rewards/chosen": -0.14948514103889465,
278
- "rewards/margins": 0.026736149564385414,
279
- "rewards/rejected": -0.17622129619121552,
280
- "sft_loss": 1.4948513507843018,
281
  "step": 160
282
  },
283
  {
284
  "epoch": 2.986666666666667,
285
  "step": 168,
286
  "total_flos": 2.2023536924295168e+17,
287
- "train_loss": 1.8065733909606934,
288
- "train_runtime": 5653.336,
289
- "train_samples_per_second": 0.478,
290
  "train_steps_per_second": 0.03
291
  }
292
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.17777777777777778,
13
+ "grad_norm": 1.983818769454956,
14
  "learning_rate": 4.957230266673969e-06,
15
+ "logits/chosen": 217.77822875976562,
16
+ "logits/rejected": 217.4987335205078,
17
+ "logps/chosen": -2.053837299346924,
18
+ "logps/rejected": -2.262728691101074,
19
+ "loss": 2.1168,
20
+ "odds_ratio_loss": 0.6295818090438843,
21
+ "rewards/accuracies": 0.5249999761581421,
22
+ "rewards/chosen": -0.20538373291492462,
23
+ "rewards/margins": 0.020889144390821457,
24
+ "rewards/rejected": -0.22627286612987518,
25
+ "sft_loss": 2.053837299346924,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 0.35555555555555557,
30
+ "grad_norm": 1.9484807252883911,
31
  "learning_rate": 4.828686741593921e-06,
32
+ "logits/chosen": 219.17361450195312,
33
+ "logits/rejected": 218.8988037109375,
34
+ "logps/chosen": -2.1305088996887207,
35
+ "logps/rejected": -2.402930736541748,
36
+ "loss": 2.1946,
37
+ "odds_ratio_loss": 0.6408200263977051,
38
  "rewards/accuracies": 0.543749988079071,
39
+ "rewards/chosen": -0.21305091679096222,
40
+ "rewards/margins": 0.027242189273238182,
41
+ "rewards/rejected": -0.24029311537742615,
42
+ "sft_loss": 2.1305088996887207,
43
  "step": 20
44
  },
45
  {
46
  "epoch": 0.5333333333333333,
47
+ "grad_norm": 1.7125110626220703,
48
  "learning_rate": 4.618852307232078e-06,
49
+ "logits/chosen": 221.2677459716797,
50
+ "logits/rejected": 221.5433807373047,
51
+ "logps/chosen": -1.9636151790618896,
52
+ "logps/rejected": -2.2476425170898438,
53
+ "loss": 2.0246,
54
+ "odds_ratio_loss": 0.6097511053085327,
55
+ "rewards/accuracies": 0.53125,
56
+ "rewards/chosen": -0.19636152684688568,
57
+ "rewards/margins": 0.028402745723724365,
58
+ "rewards/rejected": -0.22476427257061005,
59
+ "sft_loss": 1.9636151790618896,
60
  "step": 30
61
  },
62
  {
63
  "epoch": 0.7111111111111111,
64
+ "grad_norm": 3.0041399002075195,
65
  "learning_rate": 4.335051964269395e-06,
66
+ "logits/chosen": 217.79830932617188,
67
+ "logits/rejected": 218.62911987304688,
68
+ "logps/chosen": -1.8520755767822266,
69
+ "logps/rejected": -2.1365625858306885,
70
+ "loss": 1.913,
71
+ "odds_ratio_loss": 0.6088087558746338,
72
+ "rewards/accuracies": 0.5874999761581421,
73
+ "rewards/chosen": -0.1852075308561325,
74
+ "rewards/margins": 0.028448715806007385,
75
+ "rewards/rejected": -0.2136562615633011,
76
+ "sft_loss": 1.8520755767822266,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 0.8888888888888888,
81
+ "grad_norm": 3.342698335647583,
82
  "learning_rate": 3.987192750660719e-06,
83
+ "logits/chosen": 225.99374389648438,
84
+ "logits/rejected": 225.8725128173828,
85
+ "logps/chosen": -2.055860996246338,
86
+ "logps/rejected": -2.403578996658325,
87
+ "loss": 2.12,
88
+ "odds_ratio_loss": 0.6415398716926575,
89
  "rewards/accuracies": 0.5375000238418579,
90
+ "rewards/chosen": -0.20558610558509827,
91
+ "rewards/margins": 0.03477178141474724,
92
+ "rewards/rejected": -0.240357905626297,
93
+ "sft_loss": 2.055860996246338,
94
  "step": 50
95
  },
96
  {
97
  "epoch": 1.0666666666666667,
98
+ "grad_norm": 3.5346407890319824,
99
  "learning_rate": 3.587417902020876e-06,
100
+ "logits/chosen": 227.7618408203125,
101
+ "logits/rejected": 229.3314971923828,
102
+ "logps/chosen": -1.8825321197509766,
103
+ "logps/rejected": -2.1969597339630127,
104
+ "loss": 1.9418,
105
+ "odds_ratio_loss": 0.5927931070327759,
106
+ "rewards/accuracies": 0.5625,
107
+ "rewards/chosen": -0.1882532387971878,
108
+ "rewards/margins": 0.03144273906946182,
109
+ "rewards/rejected": -0.21969597041606903,
110
+ "sft_loss": 1.8825321197509766,
111
  "step": 60
112
  },
113
  {
114
  "epoch": 1.2444444444444445,
115
+ "grad_norm": 2.7227234840393066,
116
  "learning_rate": 3.1496829497545268e-06,
117
+ "logits/chosen": 228.9124298095703,
118
+ "logits/rejected": 228.7540740966797,
119
+ "logps/chosen": -1.7887885570526123,
120
+ "logps/rejected": -2.0250189304351807,
121
+ "loss": 1.8513,
122
+ "odds_ratio_loss": 0.6251059770584106,
123
+ "rewards/accuracies": 0.5625,
124
+ "rewards/chosen": -0.17887887358665466,
125
+ "rewards/margins": 0.023623019456863403,
126
+ "rewards/rejected": -0.20250189304351807,
127
+ "sft_loss": 1.7887885570526123,
128
  "step": 70
129
  },
130
  {
131
  "epoch": 1.4222222222222223,
132
+ "grad_norm": 1.4925893545150757,
133
  "learning_rate": 2.6892685546987724e-06,
134
+ "logits/chosen": 233.6576385498047,
135
+ "logits/rejected": 233.0616455078125,
136
+ "logps/chosen": -1.8144474029541016,
137
+ "logps/rejected": -2.061004638671875,
138
+ "loss": 1.8758,
139
+ "odds_ratio_loss": 0.6132601499557495,
140
+ "rewards/accuracies": 0.574999988079071,
141
+ "rewards/chosen": -0.18144474923610687,
142
+ "rewards/margins": 0.024655740708112717,
143
+ "rewards/rejected": -0.2061004638671875,
144
+ "sft_loss": 1.8144474029541016,
145
  "step": 80
146
  },
147
  {
148
  "epoch": 1.6,
149
+ "grad_norm": 1.5339916944503784,
150
  "learning_rate": 2.2222470825144806e-06,
151
+ "logits/chosen": 231.31655883789062,
152
+ "logits/rejected": 231.7608184814453,
153
+ "logps/chosen": -1.6470849514007568,
154
+ "logps/rejected": -1.9702856540679932,
155
+ "loss": 1.7086,
156
+ "odds_ratio_loss": 0.6146546602249146,
157
  "rewards/accuracies": 0.543749988079071,
158
+ "rewards/chosen": -0.16470849514007568,
159
+ "rewards/margins": 0.0323200486600399,
160
+ "rewards/rejected": -0.19702854752540588,
161
+ "sft_loss": 1.6470849514007568,
162
  "step": 90
163
  },
164
  {
165
  "epoch": 1.7777777777777777,
166
+ "grad_norm": 1.0663031339645386,
167
  "learning_rate": 1.7649215418673847e-06,
168
+ "logits/chosen": 235.10598754882812,
169
+ "logits/rejected": 235.50381469726562,
170
+ "logps/chosen": -1.750628113746643,
171
+ "logps/rejected": -1.9882148504257202,
172
+ "loss": 1.8133,
173
+ "odds_ratio_loss": 0.626555323600769,
174
+ "rewards/accuracies": 0.5625,
175
+ "rewards/chosen": -0.17506280541419983,
176
+ "rewards/margins": 0.023758674040436745,
177
+ "rewards/rejected": -0.19882148504257202,
178
+ "sft_loss": 1.750628113746643,
179
  "step": 100
180
  },
181
  {
182
  "epoch": 1.9555555555555557,
183
+ "grad_norm": 1.1423336267471313,
184
  "learning_rate": 1.3332564712129845e-06,
185
+ "logits/chosen": 236.2017059326172,
186
+ "logits/rejected": 236.12319946289062,
187
+ "logps/chosen": -1.7247778177261353,
188
+ "logps/rejected": -1.9412933588027954,
189
+ "loss": 1.7876,
190
+ "odds_ratio_loss": 0.6279042363166809,
191
+ "rewards/accuracies": 0.518750011920929,
192
+ "rewards/chosen": -0.17247776687145233,
193
+ "rewards/margins": 0.02165156602859497,
194
+ "rewards/rejected": -0.1941293478012085,
195
+ "sft_loss": 1.7247778177261353,
196
  "step": 110
197
  },
198
  {
199
  "epoch": 2.1333333333333333,
200
+ "grad_norm": 2.996070623397827,
201
  "learning_rate": 9.423206410612498e-07,
202
+ "logits/chosen": 233.90518188476562,
203
+ "logits/rejected": 234.82778930664062,
204
+ "logps/chosen": -1.7220882177352905,
205
+ "logps/rejected": -1.962699294090271,
206
+ "loss": 1.7863,
207
+ "odds_ratio_loss": 0.6424781084060669,
208
  "rewards/accuracies": 0.53125,
209
+ "rewards/chosen": -0.17220883071422577,
210
+ "rewards/margins": 0.024061108008027077,
211
+ "rewards/rejected": -0.1962699443101883,
212
+ "sft_loss": 1.7220882177352905,
213
  "step": 120
214
  },
215
  {
216
  "epoch": 2.311111111111111,
217
+ "grad_norm": 0.8977892994880676,
218
  "learning_rate": 6.057610261367044e-07,
219
+ "logits/chosen": 233.8168487548828,
220
+ "logits/rejected": 233.524658203125,
221
+ "logps/chosen": -1.6289135217666626,
222
+ "logps/rejected": -1.8276309967041016,
223
+ "loss": 1.692,
224
+ "odds_ratio_loss": 0.630408525466919,
225
+ "rewards/accuracies": 0.5625,
226
+ "rewards/chosen": -0.16289135813713074,
227
+ "rewards/margins": 0.019871745258569717,
228
+ "rewards/rejected": -0.18276306986808777,
229
+ "sft_loss": 1.6289135217666626,
230
  "step": 130
231
  },
232
  {
233
  "epoch": 2.488888888888889,
234
+ "grad_norm": 1.3126909732818604,
235
  "learning_rate": 3.3532641026504415e-07,
236
+ "logits/chosen": 237.79580688476562,
237
+ "logits/rejected": 237.5025634765625,
238
+ "logps/chosen": -1.577689528465271,
239
+ "logps/rejected": -1.9474560022354126,
240
+ "loss": 1.6376,
241
+ "odds_ratio_loss": 0.598879337310791,
242
+ "rewards/accuracies": 0.606249988079071,
243
+ "rewards/chosen": -0.15776896476745605,
244
+ "rewards/margins": 0.0369766540825367,
245
+ "rewards/rejected": -0.19474558532238007,
246
+ "sft_loss": 1.577689528465271,
247
  "step": 140
248
  },
249
  {
250
  "epoch": 2.6666666666666665,
251
+ "grad_norm": 2.6702768802642822,
252
  "learning_rate": 1.4045725421448332e-07,
253
+ "logits/chosen": 238.22622680664062,
254
+ "logits/rejected": 238.50765991210938,
255
+ "logps/chosen": -1.829874038696289,
256
+ "logps/rejected": -2.089751958847046,
257
+ "loss": 1.8891,
258
+ "odds_ratio_loss": 0.5918877720832825,
259
+ "rewards/accuracies": 0.574999988079071,
260
+ "rewards/chosen": -0.18298740684986115,
261
+ "rewards/margins": 0.02598779834806919,
262
+ "rewards/rejected": -0.2089751958847046,
263
+ "sft_loss": 1.829874038696289,
264
  "step": 150
265
  },
266
  {
267
  "epoch": 2.8444444444444446,
268
+ "grad_norm": 1.5213804244995117,
269
  "learning_rate": 2.7956143581177874e-08,
270
+ "logits/chosen": 237.4580078125,
271
+ "logits/rejected": 237.25192260742188,
272
+ "logps/chosen": -1.562623143196106,
273
+ "logps/rejected": -1.8317865133285522,
274
+ "loss": 1.6214,
275
+ "odds_ratio_loss": 0.587682843208313,
276
+ "rewards/accuracies": 0.6000000238418579,
277
+ "rewards/chosen": -0.15626230835914612,
278
+ "rewards/margins": 0.026916349306702614,
279
+ "rewards/rejected": -0.18317866325378418,
280
+ "sft_loss": 1.562623143196106,
281
  "step": 160
282
  },
283
  {
284
  "epoch": 2.986666666666667,
285
  "step": 168,
286
  "total_flos": 2.2023536924295168e+17,
287
+ "train_loss": 1.8747729460398357,
288
+ "train_runtime": 5642.3245,
289
+ "train_samples_per_second": 0.479,
290
  "train_steps_per_second": 0.03
291
  }
292
  ],
training_loss.png CHANGED
training_rewards_accuracies.png CHANGED
training_sft_loss.png CHANGED