silviasapora commited on
Commit
65abb0d
·
verified ·
1 Parent(s): babbc0b

Model save

Browse files
Files changed (4) hide show
  1. README.md +8 -11
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +189 -188
README.md CHANGED
@@ -1,19 +1,16 @@
1
  ---
2
- datasets:
3
- - argilla/dpo-mix-7k
4
  library_name: transformers
5
- model_name: /home/silvias/alignment-handbook/data/mistral-7b-sft-basic-5e-5-000-v132-full
6
  tags:
7
  - generated_from_trainer
8
- - alignment-handbook
9
  - trl
10
  - orpo
11
  licence: license
12
  ---
13
 
14
- # Model Card for /home/silvias/alignment-handbook/data/mistral-7b-sft-basic-5e-5-000-v132-full
15
 
16
- This model is a fine-tuned version of [None](https://huggingface.co/None) on the [['argilla/dpo-mix-7k']](https://huggingface.co/datasets/['argilla/dpo-mix-7k']) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +26,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/silvias/huggingface/runs/w11kzsbc)
33
 
34
 
35
  This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
@@ -37,10 +34,10 @@ This model was trained with ORPO, a method introduced in [ORPO: Monolithic Prefe
37
  ### Framework versions
38
 
39
  - TRL: 0.15.2
40
- - Transformers: 4.49.0
41
- - Pytorch: 2.5.1
42
- - Datasets: 3.2.0
43
- - Tokenizers: 0.21.1
44
 
45
  ## Citations
46
 
 
1
  ---
 
 
2
  library_name: transformers
3
+ model_name: mistral-7b-sft-simpo-basic-5e-7-005-v142
4
  tags:
5
  - generated_from_trainer
 
6
  - trl
7
  - orpo
8
  licence: license
9
  ---
10
 
11
+ # Model Card for mistral-7b-sft-simpo-basic-5e-7-005-v142
12
 
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
 
26
 
27
  ## Training procedure
28
 
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/silvias/huggingface/runs/acoqxadt)
30
 
31
 
32
  This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
 
34
  ### Framework versions
35
 
36
  - TRL: 0.15.2
37
+ - Transformers: 4.51.3
38
+ - Pytorch: 2.4.0
39
+ - Datasets: 3.0.0
40
+ - Tokenizers: 0.21.0
41
 
42
  ## Citations
43
 
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.9884169884169884,
3
  "total_flos": 0.0,
4
- "train_loss": 21.922766897413467,
5
- "train_runtime": 1196.0193,
6
  "train_samples": 4662,
7
- "train_samples_per_second": 3.898,
8
- "train_steps_per_second": 0.06
9
  }
 
1
  {
2
+ "epoch": 0.9879931389365352,
3
  "total_flos": 0.0,
4
+ "train_loss": 9.270023425420126,
5
+ "train_runtime": 759.6158,
6
  "train_samples": 4662,
7
+ "train_samples_per_second": 6.137,
8
+ "train_steps_per_second": 0.095
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.9884169884169884,
3
  "total_flos": 0.0,
4
- "train_loss": 21.922766897413467,
5
- "train_runtime": 1196.0193,
6
  "train_samples": 4662,
7
- "train_samples_per_second": 3.898,
8
- "train_steps_per_second": 0.06
9
  }
 
1
  {
2
+ "epoch": 0.9879931389365352,
3
  "total_flos": 0.0,
4
+ "train_loss": 9.270023425420126,
5
+ "train_runtime": 759.6158,
6
  "train_samples": 4662,
7
+ "train_samples_per_second": 6.137,
8
+ "train_steps_per_second": 0.095
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9884169884169884,
5
  "eval_steps": 500,
6
  "global_step": 72,
7
  "is_hyper_param_search": false,
@@ -9,237 +10,237 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06864006864006864,
13
- "grad_norm": 2.234375,
14
- "learning_rate": 3.1249999999999997e-07,
15
- "log_odds_chosen": 0.4900525212287903,
16
- "log_odds_ratio": -0.5972136855125427,
17
- "logps/chosen": -0.5807362794876099,
18
- "logps/rejected": -0.7729060053825378,
19
- "loss": 22.0294,
20
- "nll_loss": 0.8785218000411987,
21
- "rewards/accuracies": 0.653124988079071,
22
- "rewards/chosen": -0.029036814346909523,
23
- "rewards/margins": 0.009608486667275429,
24
- "rewards/rejected": -0.03864530101418495,
25
  "step": 5
26
  },
27
  {
28
- "epoch": 0.13728013728013727,
29
- "grad_norm": 2.0625,
30
- "learning_rate": 4.987961816680492e-07,
31
- "log_odds_chosen": 0.6504672169685364,
32
- "log_odds_ratio": -0.5491082668304443,
33
- "logps/chosen": -0.5200858116149902,
34
- "logps/rejected": -0.7607679963111877,
35
- "loss": 21.9908,
36
- "nll_loss": 0.8031341433525085,
37
- "rewards/accuracies": 0.690625011920929,
38
- "rewards/chosen": -0.02600429579615593,
39
- "rewards/margins": 0.012034102343022823,
40
- "rewards/rejected": -0.03803839534521103,
41
  "step": 10
42
  },
43
  {
44
- "epoch": 0.2059202059202059,
45
- "grad_norm": 2.234375,
46
- "learning_rate": 4.853860162957551e-07,
47
- "log_odds_chosen": 0.8738743662834167,
48
- "log_odds_ratio": -0.4701429009437561,
49
- "logps/chosen": -0.45837029814720154,
50
- "logps/rejected": -0.7923842668533325,
51
- "loss": 21.9168,
52
- "nll_loss": 0.7228935956954956,
53
- "rewards/accuracies": 0.7875000238418579,
54
- "rewards/chosen": -0.022918514907360077,
55
- "rewards/margins": 0.01670069992542267,
56
- "rewards/rejected": -0.039619214832782745,
57
  "step": 15
58
  },
59
  {
60
- "epoch": 0.27456027456027454,
61
- "grad_norm": 2.625,
62
- "learning_rate": 4.578674030756363e-07,
63
- "log_odds_chosen": 0.7947912812232971,
64
- "log_odds_ratio": -0.5199654698371887,
65
- "logps/chosen": -0.5325753688812256,
66
- "logps/rejected": -0.8283828496932983,
67
- "loss": 21.9475,
68
- "nll_loss": 0.7589122653007507,
69
- "rewards/accuracies": 0.71875,
70
- "rewards/chosen": -0.02662876807153225,
71
- "rewards/margins": 0.014790371060371399,
72
- "rewards/rejected": -0.0414191372692585,
73
  "step": 20
74
  },
75
  {
76
- "epoch": 0.3432003432003432,
77
- "grad_norm": 2.359375,
78
- "learning_rate": 4.178897387117546e-07,
79
- "log_odds_chosen": 0.7632033228874207,
80
- "log_odds_ratio": -0.48927417397499084,
81
- "logps/chosen": -0.477088987827301,
82
- "logps/rejected": -0.761306881904602,
83
- "loss": 21.9558,
84
- "nll_loss": 0.7712680697441101,
85
- "rewards/accuracies": 0.7906249761581421,
86
- "rewards/chosen": -0.023854447528719902,
87
- "rewards/margins": 0.014210891909897327,
88
- "rewards/rejected": -0.038065336644649506,
89
  "step": 25
90
  },
91
  {
92
- "epoch": 0.4118404118404118,
93
- "grad_norm": 2.75,
94
- "learning_rate": 3.6784918420649944e-07,
95
- "log_odds_chosen": 0.905913233757019,
96
- "log_odds_ratio": -0.4420197904109955,
97
- "logps/chosen": -0.4566729664802551,
98
- "logps/rejected": -0.8100153207778931,
99
- "loss": 21.9012,
100
- "nll_loss": 0.6997581720352173,
101
- "rewards/accuracies": 0.793749988079071,
102
- "rewards/chosen": -0.022833649069070816,
103
- "rewards/margins": 0.017667118459939957,
104
- "rewards/rejected": -0.04050076752901077,
105
  "step": 30
106
  },
107
  {
108
- "epoch": 0.4804804804804805,
109
- "grad_norm": 2.65625,
110
- "learning_rate": 3.10745044975816e-07,
111
- "log_odds_chosen": 0.9323528409004211,
112
- "log_odds_ratio": -0.44655704498291016,
113
- "logps/chosen": -0.44178277254104614,
114
- "logps/rejected": -0.7855373024940491,
115
- "loss": 21.9085,
116
- "nll_loss": 0.7086659669876099,
117
- "rewards/accuracies": 0.796875,
118
- "rewards/chosen": -0.022089136764407158,
119
- "rewards/margins": 0.017187729477882385,
120
- "rewards/rejected": -0.039276864379644394,
121
  "step": 35
122
  },
123
  {
124
- "epoch": 0.5491205491205491,
125
- "grad_norm": 2.875,
126
- "learning_rate": 2.5e-07,
127
- "log_odds_chosen": 1.0263824462890625,
128
- "log_odds_ratio": -0.4294084906578064,
129
- "logps/chosen": -0.4194498062133789,
130
- "logps/rejected": -0.791568398475647,
131
- "loss": 21.8864,
132
- "nll_loss": 0.701680064201355,
133
- "rewards/accuracies": 0.796875,
134
- "rewards/chosen": -0.020972490310668945,
135
- "rewards/margins": 0.018605932593345642,
136
- "rewards/rejected": -0.03957842290401459,
137
  "step": 40
138
  },
139
  {
140
- "epoch": 0.6177606177606177,
141
- "grad_norm": 2.640625,
142
- "learning_rate": 1.8925495502418404e-07,
143
- "log_odds_chosen": 0.9927960634231567,
144
- "log_odds_ratio": -0.45052170753479004,
145
- "logps/chosen": -0.44455814361572266,
146
- "logps/rejected": -0.800442099571228,
147
- "loss": 21.8996,
148
- "nll_loss": 0.7001025080680847,
149
- "rewards/accuracies": 0.778124988079071,
150
- "rewards/chosen": -0.022227909415960312,
151
- "rewards/margins": 0.01779419369995594,
152
- "rewards/rejected": -0.0400221012532711,
153
  "step": 45
154
  },
155
  {
156
- "epoch": 0.6864006864006864,
157
- "grad_norm": 2.703125,
158
- "learning_rate": 1.3215081579350056e-07,
159
- "log_odds_chosen": 1.010258674621582,
160
- "log_odds_ratio": -0.43662938475608826,
161
- "logps/chosen": -0.45176443457603455,
162
- "logps/rejected": -0.8273895978927612,
163
- "loss": 21.8839,
164
- "nll_loss": 0.7259224653244019,
165
- "rewards/accuracies": 0.793749988079071,
166
- "rewards/chosen": -0.022588221356272697,
167
- "rewards/margins": 0.018781261518597603,
168
- "rewards/rejected": -0.04136947914958,
169
  "step": 50
170
  },
171
  {
172
- "epoch": 0.7550407550407551,
173
- "grad_norm": 2.546875,
174
- "learning_rate": 8.211026128824538e-08,
175
- "log_odds_chosen": 0.9382259249687195,
176
- "log_odds_ratio": -0.4488070011138916,
177
- "logps/chosen": -0.4725814759731293,
178
- "logps/rejected": -0.8342872858047485,
179
- "loss": 21.8959,
180
- "nll_loss": 0.7326583862304688,
181
- "rewards/accuracies": 0.7875000238418579,
182
- "rewards/chosen": -0.023629074916243553,
183
- "rewards/margins": 0.018085282295942307,
184
- "rewards/rejected": -0.04171435907483101,
185
  "step": 55
186
  },
187
  {
188
- "epoch": 0.8236808236808236,
189
- "grad_norm": 2.65625,
190
- "learning_rate": 4.213259692436366e-08,
191
- "log_odds_chosen": 0.9388996958732605,
192
- "log_odds_ratio": -0.4496310353279114,
193
- "logps/chosen": -0.48969897627830505,
194
- "logps/rejected": -0.8507472276687622,
195
- "loss": 21.8952,
196
- "nll_loss": 0.7445230484008789,
197
- "rewards/accuracies": 0.796875,
198
- "rewards/chosen": -0.024484951049089432,
199
- "rewards/margins": 0.018052412196993828,
200
- "rewards/rejected": -0.04253736510872841,
201
  "step": 60
202
  },
203
  {
204
- "epoch": 0.8923208923208923,
205
- "grad_norm": 2.8125,
206
- "learning_rate": 1.4613983704244825e-08,
207
- "log_odds_chosen": 0.9500824809074402,
208
- "log_odds_ratio": -0.4538024067878723,
209
- "logps/chosen": -0.47038406133651733,
210
- "logps/rejected": -0.8408064842224121,
211
- "loss": 21.8882,
212
- "nll_loss": 0.7523521184921265,
213
- "rewards/accuracies": 0.796875,
214
- "rewards/chosen": -0.023519206792116165,
215
- "rewards/margins": 0.01852111890912056,
216
- "rewards/rejected": -0.042040325701236725,
217
  "step": 65
218
  },
219
  {
220
- "epoch": 0.960960960960961,
221
- "grad_norm": 2.78125,
222
- "learning_rate": 1.2038183319507956e-09,
223
- "log_odds_chosen": 0.9230395555496216,
224
- "log_odds_ratio": -0.45713886618614197,
225
- "logps/chosen": -0.4812949299812317,
226
- "logps/rejected": -0.8133236169815063,
227
- "loss": 21.918,
228
- "nll_loss": 0.7658084034919739,
229
- "rewards/accuracies": 0.793749988079071,
230
- "rewards/chosen": -0.024064745754003525,
231
- "rewards/margins": 0.016601432114839554,
232
- "rewards/rejected": -0.04066618159413338,
233
  "step": 70
234
  },
235
  {
236
- "epoch": 0.9884169884169884,
237
  "step": 72,
238
  "total_flos": 0.0,
239
- "train_loss": 21.922766897413467,
240
- "train_runtime": 1196.0193,
241
- "train_samples_per_second": 3.898,
242
- "train_steps_per_second": 0.06
243
  }
244
  ],
245
  "logging_steps": 5,
 
1
  {
2
+ "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9879931389365352,
6
  "eval_steps": 500,
7
  "global_step": 72,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.0686106346483705,
14
+ "grad_norm": 19.247772216796875,
15
+ "learning_rate": 2.5e-07,
16
+ "log_odds_chosen": 1.202063798904419,
17
+ "log_odds_ratio": -0.3932521939277649,
18
+ "logps/chosen": -0.4192759096622467,
19
+ "logps/rejected": -0.8378359079360962,
20
+ "loss": 11.1199,
21
+ "nll_loss": 0.6684929728507996,
22
+ "rewards/accuracies": 0.828125,
23
+ "rewards/chosen": -0.020963797345757484,
24
+ "rewards/margins": 0.020928001031279564,
25
+ "rewards/rejected": -0.04189179837703705,
26
  "step": 5
27
  },
28
  {
29
+ "epoch": 0.137221269296741,
30
+ "grad_norm": 16.6769962310791,
31
+ "learning_rate": 4.996988640512931e-07,
32
+ "log_odds_chosen": 1.2801547050476074,
33
+ "log_odds_ratio": -0.3870637118816376,
34
+ "logps/chosen": -0.39408302307128906,
35
+ "logps/rejected": -0.8230869174003601,
36
+ "loss": 10.5716,
37
+ "nll_loss": 0.6343531608581543,
38
+ "rewards/accuracies": 0.84375,
39
+ "rewards/chosen": -0.019704151898622513,
40
+ "rewards/margins": 0.021450195461511612,
41
+ "rewards/rejected": -0.041154347360134125,
42
  "step": 10
43
  },
44
  {
45
+ "epoch": 0.2058319039451115,
46
+ "grad_norm": 18.085586547851562,
47
+ "learning_rate": 4.892350839330522e-07,
48
+ "log_odds_chosen": 1.465559959411621,
49
+ "log_odds_ratio": -0.3448963165283203,
50
+ "logps/chosen": -0.3543047606945038,
51
+ "logps/rejected": -0.8525373339653015,
52
+ "loss": 9.6005,
53
+ "nll_loss": 0.5749582052230835,
54
+ "rewards/accuracies": 0.878125011920929,
55
+ "rewards/chosen": -0.01771523989737034,
56
+ "rewards/margins": 0.024911630898714066,
57
+ "rewards/rejected": -0.04262687265872955,
58
  "step": 15
59
  },
60
  {
61
+ "epoch": 0.274442538593482,
62
+ "grad_norm": 15.663309097290039,
63
+ "learning_rate": 4.64432152500068e-07,
64
+ "log_odds_chosen": 1.7299188375473022,
65
+ "log_odds_ratio": -0.30832645297050476,
66
+ "logps/chosen": -0.3413788974285126,
67
+ "logps/rejected": -0.897726833820343,
68
+ "loss": 8.7406,
69
+ "nll_loss": 0.5222011804580688,
70
+ "rewards/accuracies": 0.887499988079071,
71
+ "rewards/chosen": -0.017068946734070778,
72
+ "rewards/margins": 0.027817392721772194,
73
+ "rewards/rejected": -0.04488633945584297,
74
  "step": 20
75
  },
76
  {
77
+ "epoch": 0.34305317324185247,
78
+ "grad_norm": 18.158308029174805,
79
+ "learning_rate": 4.2677669529663686e-07,
80
+ "log_odds_chosen": 1.4837188720703125,
81
+ "log_odds_ratio": -0.3287079334259033,
82
+ "logps/chosen": -0.3441501259803772,
83
+ "logps/rejected": -0.8242276906967163,
84
+ "loss": 9.7102,
85
+ "nll_loss": 0.5818053483963013,
86
+ "rewards/accuracies": 0.8968750238418579,
87
+ "rewards/chosen": -0.01720750704407692,
88
+ "rewards/margins": 0.024003881961107254,
89
+ "rewards/rejected": -0.041211389005184174,
90
  "step": 25
91
  },
92
  {
93
+ "epoch": 0.411663807890223,
94
+ "grad_norm": 16.503480911254883,
95
+ "learning_rate": 3.7852568604830535e-07,
96
+ "log_odds_chosen": 1.5456057786941528,
97
+ "log_odds_ratio": -0.3088548183441162,
98
+ "logps/chosen": -0.3460735082626343,
99
+ "logps/rejected": -0.8717023134231567,
100
+ "loss": 9.158,
101
+ "nll_loss": 0.5480188727378845,
102
+ "rewards/accuracies": 0.887499988079071,
103
+ "rewards/chosen": -0.017303675413131714,
104
+ "rewards/margins": 0.026281436905264854,
105
+ "rewards/rejected": -0.04358511418104172,
106
  "step": 30
107
  },
108
  {
109
+ "epoch": 0.48027444253859347,
110
+ "grad_norm": 16.41570472717285,
111
+ "learning_rate": 3.2257116931361555e-07,
112
+ "log_odds_chosen": 1.6255455017089844,
113
+ "log_odds_ratio": -0.2997768521308899,
114
+ "logps/chosen": -0.3172294795513153,
115
+ "logps/rejected": -0.8485333323478699,
116
+ "loss": 8.8971,
117
+ "nll_loss": 0.531952977180481,
118
+ "rewards/accuracies": 0.903124988079071,
119
+ "rewards/chosen": -0.015861475840210915,
120
+ "rewards/margins": 0.026565194129943848,
121
+ "rewards/rejected": -0.04242666810750961,
122
  "step": 35
123
  },
124
  {
125
+ "epoch": 0.548885077186964,
126
+ "grad_norm": 16.295194625854492,
127
+ "learning_rate": 2.6226691858185454e-07,
128
+ "log_odds_chosen": 1.7775678634643555,
129
+ "log_odds_ratio": -0.2819564938545227,
130
+ "logps/chosen": -0.29198122024536133,
131
+ "logps/rejected": -0.8509271740913391,
132
+ "loss": 8.6232,
133
+ "nll_loss": 0.5151973962783813,
134
+ "rewards/accuracies": 0.9125000238418579,
135
+ "rewards/chosen": -0.014599060639739037,
136
+ "rewards/margins": 0.027947301045060158,
137
+ "rewards/rejected": -0.042546361684799194,
138
  "step": 40
139
  },
140
  {
141
+ "epoch": 0.6174957118353345,
142
+ "grad_norm": 18.461917877197266,
143
+ "learning_rate": 2.0122741949596793e-07,
144
+ "log_odds_chosen": 1.683835744857788,
145
+ "log_odds_ratio": -0.30903160572052,
146
+ "logps/chosen": -0.31781280040740967,
147
+ "logps/rejected": -0.8516524434089661,
148
+ "loss": 8.6307,
149
+ "nll_loss": 0.5150163769721985,
150
+ "rewards/accuracies": 0.8812500238418579,
151
+ "rewards/chosen": -0.015890639275312424,
152
+ "rewards/margins": 0.02669198252260685,
153
+ "rewards/rejected": -0.04258262366056442,
154
  "step": 45
155
  },
156
  {
157
+ "epoch": 0.6861063464837049,
158
+ "grad_norm": 17.289121627807617,
159
+ "learning_rate": 1.4311122664242953e-07,
160
+ "log_odds_chosen": 1.7723395824432373,
161
+ "log_odds_ratio": -0.28731250762939453,
162
+ "logps/chosen": -0.31122511625289917,
163
+ "logps/rejected": -0.876534640789032,
164
+ "loss": 8.8422,
165
+ "nll_loss": 0.5288792848587036,
166
+ "rewards/accuracies": 0.8999999761581421,
167
+ "rewards/chosen": -0.015561257489025593,
168
+ "rewards/margins": 0.02826547622680664,
169
+ "rewards/rejected": -0.04382672905921936,
170
  "step": 50
171
  },
172
  {
173
+ "epoch": 0.7547169811320755,
174
+ "grad_norm": 17.134496688842773,
175
+ "learning_rate": 9.140167895908865e-08,
176
+ "log_odds_chosen": 1.7291252613067627,
177
+ "log_odds_ratio": -0.28900426626205444,
178
+ "logps/chosen": -0.3234766721725464,
179
+ "logps/rejected": -0.8822552561759949,
180
+ "loss": 8.9071,
181
+ "nll_loss": 0.5326792001724243,
182
+ "rewards/accuracies": 0.909375011920929,
183
+ "rewards/chosen": -0.0161738358438015,
184
+ "rewards/margins": 0.027938928455114365,
185
+ "rewards/rejected": -0.04411276429891586,
186
  "step": 55
187
  },
188
  {
189
+ "epoch": 0.823327615780446,
190
+ "grad_norm": 16.877124786376953,
191
+ "learning_rate": 4.919811712983879e-08,
192
+ "log_odds_chosen": 1.698340654373169,
193
+ "log_odds_ratio": -0.29392507672309875,
194
+ "logps/chosen": -0.3388321101665497,
195
+ "logps/rejected": -0.8975754976272583,
196
+ "loss": 9.0244,
197
+ "nll_loss": 0.5402450561523438,
198
+ "rewards/accuracies": 0.893750011920929,
199
+ "rewards/chosen": -0.016941606998443604,
200
+ "rewards/margins": 0.02793716825544834,
201
+ "rewards/rejected": -0.044878773391246796,
202
  "step": 60
203
  },
204
  {
205
+ "epoch": 0.8919382504288165,
206
+ "grad_norm": 18.37203598022461,
207
+ "learning_rate": 1.9030116872178314e-08,
208
+ "log_odds_chosen": 1.82688307762146,
209
+ "log_odds_ratio": -0.2703757882118225,
210
+ "logps/chosen": -0.30324336886405945,
211
+ "logps/rejected": -0.8961852788925171,
212
+ "loss": 8.723,
213
+ "nll_loss": 0.5218873023986816,
214
+ "rewards/accuracies": 0.9156249761581421,
215
+ "rewards/chosen": -0.015162169933319092,
216
+ "rewards/margins": 0.029647093266248703,
217
+ "rewards/rejected": -0.044809263199567795,
218
  "step": 65
219
  },
220
  {
221
+ "epoch": 0.9605488850771869,
222
+ "grad_norm": 17.195167541503906,
223
+ "learning_rate": 2.7058725088047464e-09,
224
+ "log_odds_chosen": 1.7199735641479492,
225
+ "log_odds_ratio": -0.28608742356300354,
226
+ "logps/chosen": -0.32787787914276123,
227
+ "logps/rejected": -0.8620197176933289,
228
+ "loss": 9.1848,
229
+ "nll_loss": 0.5500085949897766,
230
+ "rewards/accuracies": 0.918749988079071,
231
+ "rewards/chosen": -0.01639389432966709,
232
+ "rewards/margins": 0.02670709416270256,
233
+ "rewards/rejected": -0.0431009940803051,
234
  "step": 70
235
  },
236
  {
237
+ "epoch": 0.9879931389365352,
238
  "step": 72,
239
  "total_flos": 0.0,
240
+ "train_loss": 9.270023425420126,
241
+ "train_runtime": 759.6158,
242
+ "train_samples_per_second": 6.137,
243
+ "train_steps_per_second": 0.095
244
  }
245
  ],
246
  "logging_steps": 5,