jbjeong91 commited on
Commit
a45c9e1
·
verified ·
1 Parent(s): 334fee2

Model save

Browse files
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3.1
4
+ base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
5
+ tags:
6
+ - trl
7
+ - cpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: llama3.1-cpo-full
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # llama3.1-cpo-full
18
+
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.6704
22
+ - Rewards/chosen: -15.1684
23
+ - Rewards/rejected: -16.0433
24
+ - Rewards/accuracies: 0.6293
25
+ - Rewards/margins: 0.8749
26
+ - Logps/rejected: -160.4333
27
+ - Logps/chosen: -151.6842
28
+ - Logits/rejected: -0.3369
29
+ - Logits/chosen: -0.3497
30
+ - Nll Loss: 0.4209
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-07
50
+ - train_batch_size: 8
51
+ - eval_batch_size: 8
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 8
55
+ - gradient_accumulation_steps: 8
56
+ - total_train_batch_size: 512
57
+ - total_eval_batch_size: 64
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 3
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|
67
+ | 1.822 | 0.9238 | 100 | 1.7791 | -14.6496 | -15.4269 | 0.6034 | 0.7773 | -154.2694 | -146.4961 | -0.4235 | -0.4380 | 0.4058 |
68
+ | 1.5612 | 1.8476 | 200 | 1.6871 | -15.1337 | -15.9726 | 0.6379 | 0.8389 | -159.7256 | -151.3367 | -0.3722 | -0.3863 | 0.4197 |
69
+ | 1.3825 | 2.7714 | 300 | 1.6704 | -15.1684 | -16.0433 | 0.6293 | 0.8749 | -160.4333 | -151.6842 | -0.3369 | -0.3497 | 0.4209 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - Transformers 4.44.2
75
+ - Pytorch 2.3.1
76
+ - Datasets 2.21.0
77
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9930715935334873,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.6959601876176433,
5
+ "train_runtime": 15481.5304,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 10.731,
8
+ "train_steps_per_second": 0.021
9
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
+ }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa1af0b7282e9c2bd9b68ba70416a2305d87e8ebdaf5af0eaac035fcf001e536
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbba6127fac218250050243ab66be3c1c7c6cbd18ec570d893c9a3de64f2f343
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99e7c8fc2a285e2db78029c0822d0b50f59c51210eb0dd3980440e453b925b0
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1907f45d71204adbaf4651c583c1c089079d94507104089eaac5fe90670aa5a
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8da86517f9ef4de6c1a1673f8178a4c9e2ac5bd4549a467c7287850beba300a
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a542f9171e541e4b3942015f77f3673cdd048521e0ef15f1c2b051b79f1099f
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5402cbfb52fcbca8833a24c4d3b66e1a6fb10e14da514d2a69ba237f9cd5133b
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a04f8718decda2a24f5e863cd0e2211188b1919c0a19dc301d5535257064ab
3
  size 1168138808
runs/Sep05_22-32-35_jjb_prism_dev2/events.out.tfevents.1725543746.jjb_prism_dev2.38742.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:445115c166012b7e6c237e39f41ce32e26f1df3e042aec524fae1afd961d0d79
3
- size 30128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69eb6571380835bcc1eb6ff3aab8ae48e4d8bb4d9c46f2c8a1effc13bbb4a50
3
+ size 31964
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9930715935334873,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.6959601876176433,
5
+ "train_runtime": 15481.5304,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 10.731,
8
+ "train_steps_per_second": 0.021
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,605 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9930715935334873,
5
+ "eval_steps": 100,
6
+ "global_step": 324,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09237875288683603,
13
+ "grad_norm": 38.34350007536878,
14
+ "learning_rate": 1.5151515151515152e-07,
15
+ "logits/chosen": -0.33047571778297424,
16
+ "logits/rejected": -0.31439679861068726,
17
+ "logps/chosen": -268.56201171875,
18
+ "logps/rejected": -270.61700439453125,
19
+ "loss": 2.4944,
20
+ "nll_loss": 0.726706862449646,
21
+ "rewards/accuracies": 0.5375000238418579,
22
+ "rewards/chosen": -26.856201171875,
23
+ "rewards/margins": 0.20550203323364258,
24
+ "rewards/rejected": -27.061702728271484,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.18475750577367206,
29
+ "grad_norm": 39.7882668385144,
30
+ "learning_rate": 3.0303030303030305e-07,
31
+ "logits/chosen": -0.3137342929840088,
32
+ "logits/rejected": -0.2968626618385315,
33
+ "logps/chosen": -261.27764892578125,
34
+ "logps/rejected": -261.04803466796875,
35
+ "loss": 2.5847,
36
+ "nll_loss": 0.736041247844696,
37
+ "rewards/accuracies": 0.518750011920929,
38
+ "rewards/chosen": -26.127761840820312,
39
+ "rewards/margins": -0.022955775260925293,
40
+ "rewards/rejected": -26.10480308532715,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 0.27713625866050806,
45
+ "grad_norm": 32.849922759930486,
46
+ "learning_rate": 4.545454545454545e-07,
47
+ "logits/chosen": -0.383260041475296,
48
+ "logits/rejected": -0.3610544204711914,
49
+ "logps/chosen": -254.9075927734375,
50
+ "logps/rejected": -254.6737823486328,
51
+ "loss": 2.4052,
52
+ "nll_loss": 0.7010518312454224,
53
+ "rewards/accuracies": 0.515625,
54
+ "rewards/chosen": -25.490758895874023,
55
+ "rewards/margins": -0.023382291197776794,
56
+ "rewards/rejected": -25.467376708984375,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 0.3695150115473441,
61
+ "grad_norm": 26.712148454979943,
62
+ "learning_rate": 4.879725085910652e-07,
63
+ "logits/chosen": -0.5479347705841064,
64
+ "logits/rejected": -0.5087471008300781,
65
+ "logps/chosen": -220.08718872070312,
66
+ "logps/rejected": -216.94229125976562,
67
+ "loss": 2.3725,
68
+ "nll_loss": 0.6341860890388489,
69
+ "rewards/accuracies": 0.5015624761581421,
70
+ "rewards/chosen": -22.008716583251953,
71
+ "rewards/margins": -0.3144901692867279,
72
+ "rewards/rejected": -21.69422721862793,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 0.4618937644341801,
77
+ "grad_norm": 26.784049649942634,
78
+ "learning_rate": 4.707903780068728e-07,
79
+ "logits/chosen": -0.8294746279716492,
80
+ "logits/rejected": -0.8073676228523254,
81
+ "logps/chosen": -196.47360229492188,
82
+ "logps/rejected": -196.55337524414062,
83
+ "loss": 2.0929,
84
+ "nll_loss": 0.5368759036064148,
85
+ "rewards/accuracies": 0.5296875238418579,
86
+ "rewards/chosen": -19.64735984802246,
87
+ "rewards/margins": 0.007975578308105469,
88
+ "rewards/rejected": -19.655336380004883,
89
+ "step": 50
90
+ },
91
+ {
92
+ "epoch": 0.5542725173210161,
93
+ "grad_norm": 28.025237769650065,
94
+ "learning_rate": 4.536082474226804e-07,
95
+ "logits/chosen": -0.6816179752349854,
96
+ "logits/rejected": -0.6509512066841125,
97
+ "logps/chosen": -175.80374145507812,
98
+ "logps/rejected": -176.0839385986328,
99
+ "loss": 2.0271,
100
+ "nll_loss": 0.46367818117141724,
101
+ "rewards/accuracies": 0.510937511920929,
102
+ "rewards/chosen": -17.580373764038086,
103
+ "rewards/margins": 0.028019297868013382,
104
+ "rewards/rejected": -17.608394622802734,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 0.6466512702078522,
109
+ "grad_norm": 26.448824948400027,
110
+ "learning_rate": 4.3642611683848796e-07,
111
+ "logits/chosen": -0.5208871364593506,
112
+ "logits/rejected": -0.4965832233428955,
113
+ "logps/chosen": -160.55596923828125,
114
+ "logps/rejected": -162.43707275390625,
115
+ "loss": 1.908,
116
+ "nll_loss": 0.4267793595790863,
117
+ "rewards/accuracies": 0.5249999761581421,
118
+ "rewards/chosen": -16.05559730529785,
119
+ "rewards/margins": 0.1881115734577179,
120
+ "rewards/rejected": -16.24370765686035,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.7390300230946882,
125
+ "grad_norm": 25.832913188032137,
126
+ "learning_rate": 4.1924398625429554e-07,
127
+ "logits/chosen": -0.42753878235816956,
128
+ "logits/rejected": -0.4124147295951843,
129
+ "logps/chosen": -153.801513671875,
130
+ "logps/rejected": -158.33753967285156,
131
+ "loss": 1.8956,
132
+ "nll_loss": 0.4220770001411438,
133
+ "rewards/accuracies": 0.542187511920929,
134
+ "rewards/chosen": -15.380151748657227,
135
+ "rewards/margins": 0.453604519367218,
136
+ "rewards/rejected": -15.833755493164062,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.8314087759815243,
141
+ "grad_norm": 27.79297058622181,
142
+ "learning_rate": 4.020618556701031e-07,
143
+ "logits/chosen": -0.4597485661506653,
144
+ "logits/rejected": -0.4340926706790924,
145
+ "logps/chosen": -150.138427734375,
146
+ "logps/rejected": -151.1810760498047,
147
+ "loss": 1.8861,
148
+ "nll_loss": 0.4107755124568939,
149
+ "rewards/accuracies": 0.5406249761581421,
150
+ "rewards/chosen": -15.013842582702637,
151
+ "rewards/margins": 0.10426414012908936,
152
+ "rewards/rejected": -15.1181058883667,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 0.9237875288683602,
157
+ "grad_norm": 25.1491914386423,
158
+ "learning_rate": 3.8487972508591063e-07,
159
+ "logits/chosen": -0.5064208507537842,
160
+ "logits/rejected": -0.4822482168674469,
161
+ "logps/chosen": -159.95938110351562,
162
+ "logps/rejected": -161.27655029296875,
163
+ "loss": 1.822,
164
+ "nll_loss": 0.41467323899269104,
165
+ "rewards/accuracies": 0.5249999761581421,
166
+ "rewards/chosen": -15.995938301086426,
167
+ "rewards/margins": 0.131715327501297,
168
+ "rewards/rejected": -16.127653121948242,
169
+ "step": 100
170
+ },
171
+ {
172
+ "epoch": 0.9237875288683602,
173
+ "eval_logits/chosen": -0.4379667639732361,
174
+ "eval_logits/rejected": -0.42346981167793274,
175
+ "eval_logps/chosen": -146.49607849121094,
176
+ "eval_logps/rejected": -154.26937866210938,
177
+ "eval_loss": 1.7790985107421875,
178
+ "eval_nll_loss": 0.4057552218437195,
179
+ "eval_rewards/accuracies": 0.6034482717514038,
180
+ "eval_rewards/chosen": -14.649608612060547,
181
+ "eval_rewards/margins": 0.777328372001648,
182
+ "eval_rewards/rejected": -15.4269380569458,
183
+ "eval_runtime": 65.2011,
184
+ "eval_samples_per_second": 28.006,
185
+ "eval_steps_per_second": 0.445,
186
+ "step": 100
187
+ },
188
+ {
189
+ "epoch": 1.0161662817551964,
190
+ "grad_norm": 24.357108466796436,
191
+ "learning_rate": 3.676975945017182e-07,
192
+ "logits/chosen": -0.44147372245788574,
193
+ "logits/rejected": -0.4213744103908539,
194
+ "logps/chosen": -148.9695281982422,
195
+ "logps/rejected": -151.85446166992188,
196
+ "loss": 1.7788,
197
+ "nll_loss": 0.40945443511009216,
198
+ "rewards/accuracies": 0.5687500238418579,
199
+ "rewards/chosen": -14.896951675415039,
200
+ "rewards/margins": 0.2884957790374756,
201
+ "rewards/rejected": -15.185447692871094,
202
+ "step": 110
203
+ },
204
+ {
205
+ "epoch": 1.1085450346420322,
206
+ "grad_norm": 30.225216765479118,
207
+ "learning_rate": 3.5051546391752573e-07,
208
+ "logits/chosen": -0.41636085510253906,
209
+ "logits/rejected": -0.38961413502693176,
210
+ "logps/chosen": -149.9561309814453,
211
+ "logps/rejected": -154.90982055664062,
212
+ "loss": 1.6408,
213
+ "nll_loss": 0.40735840797424316,
214
+ "rewards/accuracies": 0.5953124761581421,
215
+ "rewards/chosen": -14.995613098144531,
216
+ "rewards/margins": 0.49536871910095215,
217
+ "rewards/rejected": -15.490982055664062,
218
+ "step": 120
219
+ },
220
+ {
221
+ "epoch": 1.2009237875288683,
222
+ "grad_norm": 25.661477968018204,
223
+ "learning_rate": 3.333333333333333e-07,
224
+ "logits/chosen": -0.39541321992874146,
225
+ "logits/rejected": -0.36797264218330383,
226
+ "logps/chosen": -143.75636291503906,
227
+ "logps/rejected": -149.67919921875,
228
+ "loss": 1.6412,
229
+ "nll_loss": 0.4088224768638611,
230
+ "rewards/accuracies": 0.604687511920929,
231
+ "rewards/chosen": -14.375636100769043,
232
+ "rewards/margins": 0.5922830700874329,
233
+ "rewards/rejected": -14.967920303344727,
234
+ "step": 130
235
+ },
236
+ {
237
+ "epoch": 1.2933025404157044,
238
+ "grad_norm": 24.629268500456213,
239
+ "learning_rate": 3.161512027491409e-07,
240
+ "logits/chosen": -0.4200739860534668,
241
+ "logits/rejected": -0.40387552976608276,
242
+ "logps/chosen": -154.5819091796875,
243
+ "logps/rejected": -162.4684600830078,
244
+ "loss": 1.5851,
245
+ "nll_loss": 0.42036017775535583,
246
+ "rewards/accuracies": 0.604687511920929,
247
+ "rewards/chosen": -15.45819091796875,
248
+ "rewards/margins": 0.7886544466018677,
249
+ "rewards/rejected": -16.246845245361328,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 1.3856812933025404,
254
+ "grad_norm": 27.244637011376536,
255
+ "learning_rate": 2.9896907216494845e-07,
256
+ "logits/chosen": -0.4014149606227875,
257
+ "logits/rejected": -0.38134342432022095,
258
+ "logps/chosen": -157.56259155273438,
259
+ "logps/rejected": -163.28109741210938,
260
+ "loss": 1.6163,
261
+ "nll_loss": 0.42149510979652405,
262
+ "rewards/accuracies": 0.5953124761581421,
263
+ "rewards/chosen": -15.756260871887207,
264
+ "rewards/margins": 0.5718507170677185,
265
+ "rewards/rejected": -16.32811164855957,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 1.4780600461893765,
270
+ "grad_norm": 48.54509039980329,
271
+ "learning_rate": 2.81786941580756e-07,
272
+ "logits/chosen": -0.4404594302177429,
273
+ "logits/rejected": -0.43164220452308655,
274
+ "logps/chosen": -162.8580780029297,
275
+ "logps/rejected": -169.2598876953125,
276
+ "loss": 1.572,
277
+ "nll_loss": 0.4240867495536804,
278
+ "rewards/accuracies": 0.596875011920929,
279
+ "rewards/chosen": -16.285808563232422,
280
+ "rewards/margins": 0.6401800513267517,
281
+ "rewards/rejected": -16.925989151000977,
282
+ "step": 160
283
+ },
284
+ {
285
+ "epoch": 1.5704387990762125,
286
+ "grad_norm": 26.569985559411176,
287
+ "learning_rate": 2.6460481099656354e-07,
288
+ "logits/chosen": -0.41170358657836914,
289
+ "logits/rejected": -0.40014591813087463,
290
+ "logps/chosen": -152.54824829101562,
291
+ "logps/rejected": -160.4109344482422,
292
+ "loss": 1.5587,
293
+ "nll_loss": 0.4169366955757141,
294
+ "rewards/accuracies": 0.6390625238418579,
295
+ "rewards/chosen": -15.2548246383667,
296
+ "rewards/margins": 0.7862688302993774,
297
+ "rewards/rejected": -16.041095733642578,
298
+ "step": 170
299
+ },
300
+ {
301
+ "epoch": 1.6628175519630486,
302
+ "grad_norm": 24.616859305838048,
303
+ "learning_rate": 2.474226804123711e-07,
304
+ "logits/chosen": -0.4424857497215271,
305
+ "logits/rejected": -0.43130555748939514,
306
+ "logps/chosen": -153.38320922851562,
307
+ "logps/rejected": -157.69728088378906,
308
+ "loss": 1.531,
309
+ "nll_loss": 0.4121263921260834,
310
+ "rewards/accuracies": 0.5703125,
311
+ "rewards/chosen": -15.3383207321167,
312
+ "rewards/margins": 0.43140602111816406,
313
+ "rewards/rejected": -15.76972770690918,
314
+ "step": 180
315
+ },
316
+ {
317
+ "epoch": 1.7551963048498846,
318
+ "grad_norm": 24.22918462233095,
319
+ "learning_rate": 2.3024054982817866e-07,
320
+ "logits/chosen": -0.40492838621139526,
321
+ "logits/rejected": -0.3852563202381134,
322
+ "logps/chosen": -155.97390747070312,
323
+ "logps/rejected": -163.59666442871094,
324
+ "loss": 1.5443,
325
+ "nll_loss": 0.4084969162940979,
326
+ "rewards/accuracies": 0.596875011920929,
327
+ "rewards/chosen": -15.597391128540039,
328
+ "rewards/margins": 0.7622756958007812,
329
+ "rewards/rejected": -16.359668731689453,
330
+ "step": 190
331
+ },
332
+ {
333
+ "epoch": 1.8475750577367207,
334
+ "grad_norm": 24.111596988391938,
335
+ "learning_rate": 2.1305841924398624e-07,
336
+ "logits/chosen": -0.38298338651657104,
337
+ "logits/rejected": -0.35016077756881714,
338
+ "logps/chosen": -148.51443481445312,
339
+ "logps/rejected": -155.7366943359375,
340
+ "loss": 1.5612,
341
+ "nll_loss": 0.41300660371780396,
342
+ "rewards/accuracies": 0.590624988079071,
343
+ "rewards/chosen": -14.85144329071045,
344
+ "rewards/margins": 0.7222263813018799,
345
+ "rewards/rejected": -15.573671340942383,
346
+ "step": 200
347
+ },
348
+ {
349
+ "epoch": 1.8475750577367207,
350
+ "eval_logits/chosen": -0.38625869154930115,
351
+ "eval_logits/rejected": -0.3721800148487091,
352
+ "eval_logps/chosen": -151.33670043945312,
353
+ "eval_logps/rejected": -159.72564697265625,
354
+ "eval_loss": 1.6871463060379028,
355
+ "eval_nll_loss": 0.419677197933197,
356
+ "eval_rewards/accuracies": 0.6379310488700867,
357
+ "eval_rewards/chosen": -15.133668899536133,
358
+ "eval_rewards/margins": 0.8388964533805847,
359
+ "eval_rewards/rejected": -15.972565650939941,
360
+ "eval_runtime": 44.5152,
361
+ "eval_samples_per_second": 41.02,
362
+ "eval_steps_per_second": 0.651,
363
+ "step": 200
364
+ },
365
+ {
366
+ "epoch": 1.9399538106235565,
367
+ "grad_norm": 24.485330144648206,
368
+ "learning_rate": 1.958762886597938e-07,
369
+ "logits/chosen": -0.3989901542663574,
370
+ "logits/rejected": -0.38505780696868896,
371
+ "logps/chosen": -154.37796020507812,
372
+ "logps/rejected": -161.5634307861328,
373
+ "loss": 1.5471,
374
+ "nll_loss": 0.42780718207359314,
375
+ "rewards/accuracies": 0.6109374761581421,
376
+ "rewards/chosen": -15.437795639038086,
377
+ "rewards/margins": 0.7185462713241577,
378
+ "rewards/rejected": -16.15634536743164,
379
+ "step": 210
380
+ },
381
+ {
382
+ "epoch": 2.032332563510393,
383
+ "grad_norm": 23.912915890804598,
384
+ "learning_rate": 1.7869415807560136e-07,
385
+ "logits/chosen": -0.4208546578884125,
386
+ "logits/rejected": -0.4081268310546875,
387
+ "logps/chosen": -150.35691833496094,
388
+ "logps/rejected": -160.062744140625,
389
+ "loss": 1.4932,
390
+ "nll_loss": 0.4046563506126404,
391
+ "rewards/accuracies": 0.620312511920929,
392
+ "rewards/chosen": -15.035693168640137,
393
+ "rewards/margins": 0.970583438873291,
394
+ "rewards/rejected": -16.006277084350586,
395
+ "step": 220
396
+ },
397
+ {
398
+ "epoch": 2.1247113163972284,
399
+ "grad_norm": 28.319868627323874,
400
+ "learning_rate": 1.6151202749140893e-07,
401
+ "logits/chosen": -0.4150509834289551,
402
+ "logits/rejected": -0.39563247561454773,
403
+ "logps/chosen": -154.30528259277344,
404
+ "logps/rejected": -164.85025024414062,
405
+ "loss": 1.3917,
406
+ "nll_loss": 0.424283504486084,
407
+ "rewards/accuracies": 0.640625,
408
+ "rewards/chosen": -15.43052864074707,
409
+ "rewards/margins": 1.0544955730438232,
410
+ "rewards/rejected": -16.48502540588379,
411
+ "step": 230
412
+ },
413
+ {
414
+ "epoch": 2.2170900692840645,
415
+ "grad_norm": 26.100118895427645,
416
+ "learning_rate": 1.4432989690721648e-07,
417
+ "logits/chosen": -0.3663300573825836,
418
+ "logits/rejected": -0.3529093861579895,
419
+ "logps/chosen": -153.01861572265625,
420
+ "logps/rejected": -165.33999633789062,
421
+ "loss": 1.3738,
422
+ "nll_loss": 0.40894705057144165,
423
+ "rewards/accuracies": 0.6796875,
424
+ "rewards/chosen": -15.301861763000488,
425
+ "rewards/margins": 1.2321385145187378,
426
+ "rewards/rejected": -16.53400230407715,
427
+ "step": 240
428
+ },
429
+ {
430
+ "epoch": 2.3094688221709005,
431
+ "grad_norm": 31.011772944003695,
432
+ "learning_rate": 1.2714776632302405e-07,
433
+ "logits/chosen": -0.4251771867275238,
434
+ "logits/rejected": -0.4077603816986084,
435
+ "logps/chosen": -159.03237915039062,
436
+ "logps/rejected": -167.05409240722656,
437
+ "loss": 1.3875,
438
+ "nll_loss": 0.4267016053199768,
439
+ "rewards/accuracies": 0.625,
440
+ "rewards/chosen": -15.903238296508789,
441
+ "rewards/margins": 0.8021726608276367,
442
+ "rewards/rejected": -16.70541000366211,
443
+ "step": 250
444
+ },
445
+ {
446
+ "epoch": 2.4018475750577366,
447
+ "grad_norm": 27.88691245436523,
448
+ "learning_rate": 1.099656357388316e-07,
449
+ "logits/chosen": -0.3865527808666229,
450
+ "logits/rejected": -0.3643147349357605,
451
+ "logps/chosen": -153.9661865234375,
452
+ "logps/rejected": -164.27066040039062,
453
+ "loss": 1.4061,
454
+ "nll_loss": 0.409515380859375,
455
+ "rewards/accuracies": 0.653124988079071,
456
+ "rewards/chosen": -15.39661693572998,
457
+ "rewards/margins": 1.0304476022720337,
458
+ "rewards/rejected": -16.427064895629883,
459
+ "step": 260
460
+ },
461
+ {
462
+ "epoch": 2.4942263279445727,
463
+ "grad_norm": 36.88845169314625,
464
+ "learning_rate": 9.278350515463918e-08,
465
+ "logits/chosen": -0.41444501280784607,
466
+ "logits/rejected": -0.3972172141075134,
467
+ "logps/chosen": -155.81336975097656,
468
+ "logps/rejected": -167.61431884765625,
469
+ "loss": 1.3905,
470
+ "nll_loss": 0.4134409427642822,
471
+ "rewards/accuracies": 0.6703125238418579,
472
+ "rewards/chosen": -15.581338882446289,
473
+ "rewards/margins": 1.1800928115844727,
474
+ "rewards/rejected": -16.761430740356445,
475
+ "step": 270
476
+ },
477
+ {
478
+ "epoch": 2.5866050808314087,
479
+ "grad_norm": 25.23086170545782,
480
+ "learning_rate": 7.560137457044672e-08,
481
+ "logits/chosen": -0.36893123388290405,
482
+ "logits/rejected": -0.35938116908073425,
483
+ "logps/chosen": -149.61871337890625,
484
+ "logps/rejected": -161.56121826171875,
485
+ "loss": 1.3643,
486
+ "nll_loss": 0.4170606732368469,
487
+ "rewards/accuracies": 0.667187511920929,
488
+ "rewards/chosen": -14.961870193481445,
489
+ "rewards/margins": 1.1942520141601562,
490
+ "rewards/rejected": -16.1561222076416,
491
+ "step": 280
492
+ },
493
+ {
494
+ "epoch": 2.678983833718245,
495
+ "grad_norm": 27.86871548844565,
496
+ "learning_rate": 5.8419243986254297e-08,
497
+ "logits/chosen": -0.39300569891929626,
498
+ "logits/rejected": -0.37821659445762634,
499
+ "logps/chosen": -158.05575561523438,
500
+ "logps/rejected": -168.22007751464844,
501
+ "loss": 1.3372,
502
+ "nll_loss": 0.4216877520084381,
503
+ "rewards/accuracies": 0.6328125,
504
+ "rewards/chosen": -15.805575370788574,
505
+ "rewards/margins": 1.0164330005645752,
506
+ "rewards/rejected": -16.822010040283203,
507
+ "step": 290
508
+ },
509
+ {
510
+ "epoch": 2.771362586605081,
511
+ "grad_norm": 23.796037905801665,
512
+ "learning_rate": 4.123711340206185e-08,
513
+ "logits/chosen": -0.3558502793312073,
514
+ "logits/rejected": -0.36145851016044617,
515
+ "logps/chosen": -145.80899047851562,
516
+ "logps/rejected": -159.22427368164062,
517
+ "loss": 1.3825,
518
+ "nll_loss": 0.42257922887802124,
519
+ "rewards/accuracies": 0.6859375238418579,
520
+ "rewards/chosen": -14.580899238586426,
521
+ "rewards/margins": 1.3415263891220093,
522
+ "rewards/rejected": -15.9224271774292,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 2.771362586605081,
527
+ "eval_logits/chosen": -0.34973594546318054,
528
+ "eval_logits/rejected": -0.3369295001029968,
529
+ "eval_logps/chosen": -151.68421936035156,
530
+ "eval_logps/rejected": -160.43328857421875,
531
+ "eval_loss": 1.6704407930374146,
532
+ "eval_nll_loss": 0.4208527207374573,
533
+ "eval_rewards/accuracies": 0.6293103694915771,
534
+ "eval_rewards/chosen": -15.168424606323242,
535
+ "eval_rewards/margins": 0.8749059438705444,
536
+ "eval_rewards/rejected": -16.04332733154297,
537
+ "eval_runtime": 42.0278,
538
+ "eval_samples_per_second": 43.447,
539
+ "eval_steps_per_second": 0.69,
540
+ "step": 300
541
+ },
542
+ {
543
+ "epoch": 2.863741339491917,
544
+ "grad_norm": 26.44341401066327,
545
+ "learning_rate": 2.4054982817869415e-08,
546
+ "logits/chosen": -0.35747581720352173,
547
+ "logits/rejected": -0.34428220987319946,
548
+ "logps/chosen": -149.22958374023438,
549
+ "logps/rejected": -160.0894317626953,
550
+ "loss": 1.408,
551
+ "nll_loss": 0.41082078218460083,
552
+ "rewards/accuracies": 0.6468750238418579,
553
+ "rewards/chosen": -14.922956466674805,
554
+ "rewards/margins": 1.0859849452972412,
555
+ "rewards/rejected": -16.008943557739258,
556
+ "step": 310
557
+ },
558
+ {
559
+ "epoch": 2.956120092378753,
560
+ "grad_norm": 27.055661510056673,
561
+ "learning_rate": 6.872852233676975e-09,
562
+ "logits/chosen": -0.3484032452106476,
563
+ "logits/rejected": -0.3378998041152954,
564
+ "logps/chosen": -155.7621307373047,
565
+ "logps/rejected": -166.55508422851562,
566
+ "loss": 1.3769,
567
+ "nll_loss": 0.41843119263648987,
568
+ "rewards/accuracies": 0.660937488079071,
569
+ "rewards/chosen": -15.576214790344238,
570
+ "rewards/margins": 1.0792920589447021,
571
+ "rewards/rejected": -16.655506134033203,
572
+ "step": 320
573
+ },
574
+ {
575
+ "epoch": 2.9930715935334873,
576
+ "step": 324,
577
+ "total_flos": 0.0,
578
+ "train_loss": 1.6959601876176433,
579
+ "train_runtime": 15481.5304,
580
+ "train_samples_per_second": 10.731,
581
+ "train_steps_per_second": 0.021
582
+ }
583
+ ],
584
+ "logging_steps": 10,
585
+ "max_steps": 324,
586
+ "num_input_tokens_seen": 0,
587
+ "num_train_epochs": 3,
588
+ "save_steps": 100,
589
+ "stateful_callbacks": {
590
+ "TrainerControl": {
591
+ "args": {
592
+ "should_epoch_stop": false,
593
+ "should_evaluate": false,
594
+ "should_log": false,
595
+ "should_save": true,
596
+ "should_training_stop": true
597
+ },
598
+ "attributes": {}
599
+ }
600
+ },
601
+ "total_flos": 0.0,
602
+ "train_batch_size": 8,
603
+ "trial_name": null,
604
+ "trial_params": null
605
+ }