jena-shreyas commited on
Commit
aecbf17
·
verified ·
1 Parent(s): b828a79

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 256,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "k_proj",
18
+ "v_proj",
19
+ "o_proj",
20
+ "q_proj",
21
+ "out_proj",
22
+ "fc1",
23
+ "down_proj",
24
+ "fc2",
25
+ "gate_proj",
26
+ "up_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM"
29
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ed82dac1b34aae060b966febf7813ff0ae7175570bf89edc6b268991ab480d2
3
+ size 785303482
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "X": [
3
+ "Image",
4
+ "Video"
5
+ ],
6
+ "_attn_implementation_autoset": true,
7
+ "_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
8
+ "architectures": [
9
+ "Videollama3Qwen2ForCausalLM"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "auto_map": {
13
+ "AutoConfig": "DAMO-NLP-SG/VideoLLaMA3-7B--configuration_videollama3.Videollama3Qwen2Config",
14
+ "AutoModelForCausalLM": "DAMO-NLP-SG/VideoLLaMA3-7B--modeling_videollama3.Videollama3Qwen2ForCausalLM"
15
+ },
16
+ "bos_token_id": 151643,
17
+ "eos_token_id": 151645,
18
+ "freeze_mm_mlp_adapter": true,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 3584,
21
+ "image_aspect_ratio": "pad",
22
+ "image_grid_pinpoints": null,
23
+ "image_token_index": 151665,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 18944,
26
+ "max_position_embeddings": 32768,
27
+ "max_window_layers": 28,
28
+ "mm_projector_type": "mlp2x_gelu",
29
+ "mm_use_x_patch_token": false,
30
+ "mm_use_x_start_end": false,
31
+ "model_type": "videollama3_qwen2",
32
+ "num_attention_heads": 28,
33
+ "num_hidden_layers": 28,
34
+ "num_key_value_heads": 4,
35
+ "rms_norm_eps": 1e-06,
36
+ "rope_scaling": null,
37
+ "rope_theta": 1000000.0,
38
+ "sliding_window": null,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.46.3",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_sliding_window": false,
45
+ "use_token_compression": false,
46
+ "vision_encoder": null,
47
+ "vision_encoder_config": {
48
+ "hidden_size": 1152,
49
+ "intermediate_size": 4304,
50
+ "model_type": "videollama3_vision_encoder",
51
+ "num_attention_heads": 16,
52
+ "num_hidden_layers": 27,
53
+ "patch_size": 14
54
+ },
55
+ "vocab_size": 152064
56
+ }
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b
3
+ size 912
trainer_state.json ADDED
@@ -0,0 +1,847 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 1.0416666666666667e-07,
14
+ "logps/chosen": -50.84611511230469,
15
+ "logps/rejected": -68.16911315917969,
16
+ "loss": 0.7128,
17
+ "losses/dpo": 0.7152940630912781,
18
+ "losses/sft": 1.772851586341858,
19
+ "losses/total": 0.7152940630912781,
20
+ "ref_logps/chosen": -50.76835632324219,
21
+ "ref_logps/rejected": -68.15713500976562,
22
+ "rewards/accuracies": 0.4920000433921814,
23
+ "rewards/chosen": -0.007775493897497654,
24
+ "rewards/margins": -0.006577868480235338,
25
+ "rewards/rejected": -0.0011976259993389249,
26
+ "step": 25
27
+ },
28
+ {
29
+ "epoch": 0.12,
30
+ "learning_rate": 2.0833333333333333e-07,
31
+ "logps/chosen": -51.992218017578125,
32
+ "logps/rejected": -67.8126220703125,
33
+ "loss": 0.7059,
34
+ "losses/dpo": 0.7131974697113037,
35
+ "losses/sft": 1.781252384185791,
36
+ "losses/total": 0.7131974697113037,
37
+ "ref_logps/chosen": -52.02339172363281,
38
+ "ref_logps/rejected": -67.7878646850586,
39
+ "rewards/accuracies": 0.5110000371932983,
40
+ "rewards/chosen": 0.003117597894743085,
41
+ "rewards/margins": 0.005593638867139816,
42
+ "rewards/rejected": -0.0024760416708886623,
43
+ "step": 50
44
+ },
45
+ {
46
+ "epoch": 0.19,
47
+ "learning_rate": 3.1249999999999997e-07,
48
+ "logps/chosen": -52.34601593017578,
49
+ "logps/rejected": -67.67086029052734,
50
+ "loss": 0.7097,
51
+ "losses/dpo": 0.7076306939125061,
52
+ "losses/sft": 1.7903097867965698,
53
+ "losses/total": 0.7076306939125061,
54
+ "ref_logps/chosen": -52.40769577026367,
55
+ "ref_logps/rejected": -67.70091247558594,
56
+ "rewards/accuracies": 0.49650001525878906,
57
+ "rewards/chosen": 0.006168678868561983,
58
+ "rewards/margins": 0.003163211978971958,
59
+ "rewards/rejected": 0.003005466191098094,
60
+ "step": 75
61
+ },
62
+ {
63
+ "epoch": 0.25,
64
+ "learning_rate": 4.1666666666666667e-07,
65
+ "logps/chosen": -51.974483489990234,
66
+ "logps/rejected": -68.44440460205078,
67
+ "loss": 0.7115,
68
+ "losses/dpo": 0.7194903492927551,
69
+ "losses/sft": 1.777998924255371,
70
+ "losses/total": 0.7194903492927551,
71
+ "ref_logps/chosen": -51.95515441894531,
72
+ "ref_logps/rejected": -68.4430923461914,
73
+ "rewards/accuracies": 0.4794999957084656,
74
+ "rewards/chosen": -0.0019327354384586215,
75
+ "rewards/margins": -0.0018010810017585754,
76
+ "rewards/rejected": -0.00013165354903321713,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.31,
81
+ "learning_rate": 4.976851851851852e-07,
82
+ "logps/chosen": -51.21366882324219,
83
+ "logps/rejected": -68.10318756103516,
84
+ "loss": 0.7096,
85
+ "losses/dpo": 0.7016645073890686,
86
+ "losses/sft": 1.7784287929534912,
87
+ "losses/total": 0.7016645073890686,
88
+ "ref_logps/chosen": -51.175315856933594,
89
+ "ref_logps/rejected": -68.03705596923828,
90
+ "rewards/accuracies": 0.5074999928474426,
91
+ "rewards/chosen": -0.003834722563624382,
92
+ "rewards/margins": 0.002778256544843316,
93
+ "rewards/rejected": -0.006612981669604778,
94
+ "step": 125
95
+ },
96
+ {
97
+ "epoch": 0.38,
98
+ "learning_rate": 4.861111111111111e-07,
99
+ "logps/chosen": -49.84953689575195,
100
+ "logps/rejected": -66.59363555908203,
101
+ "loss": 0.6989,
102
+ "losses/dpo": 0.6881049275398254,
103
+ "losses/sft": 1.7335822582244873,
104
+ "losses/total": 0.6881049275398254,
105
+ "ref_logps/chosen": -49.83342361450195,
106
+ "ref_logps/rejected": -66.36032104492188,
107
+ "rewards/accuracies": 0.5224999785423279,
108
+ "rewards/chosen": -0.0016115426551550627,
109
+ "rewards/margins": 0.021719880402088165,
110
+ "rewards/rejected": -0.023331422358751297,
111
+ "step": 150
112
+ },
113
+ {
114
+ "epoch": 0.44,
115
+ "learning_rate": 4.74537037037037e-07,
116
+ "logps/chosen": -51.20610427856445,
117
+ "logps/rejected": -68.37096405029297,
118
+ "loss": 0.6965,
119
+ "losses/dpo": 0.6989789605140686,
120
+ "losses/sft": 1.8248052597045898,
121
+ "losses/total": 0.6989789605140686,
122
+ "ref_logps/chosen": -51.07723617553711,
123
+ "ref_logps/rejected": -67.93907165527344,
124
+ "rewards/accuracies": 0.5259999632835388,
125
+ "rewards/chosen": -0.012886175885796547,
126
+ "rewards/margins": 0.0303032249212265,
127
+ "rewards/rejected": -0.0431894026696682,
128
+ "step": 175
129
+ },
130
+ {
131
+ "epoch": 0.5,
132
+ "learning_rate": 4.6296296296296297e-07,
133
+ "logps/chosen": -53.1021842956543,
134
+ "logps/rejected": -69.64921569824219,
135
+ "loss": 0.6821,
136
+ "losses/dpo": 0.6905575394630432,
137
+ "losses/sft": 1.7901018857955933,
138
+ "losses/total": 0.6905575394630432,
139
+ "ref_logps/chosen": -52.89280319213867,
140
+ "ref_logps/rejected": -68.83776092529297,
141
+ "rewards/accuracies": 0.5610000491142273,
142
+ "rewards/chosen": -0.02093815989792347,
143
+ "rewards/margins": 0.0602080263197422,
144
+ "rewards/rejected": -0.08114618062973022,
145
+ "step": 200
146
+ },
147
+ {
148
+ "epoch": 0.56,
149
+ "learning_rate": 4.513888888888889e-07,
150
+ "logps/chosen": -51.60838317871094,
151
+ "logps/rejected": -68.19721984863281,
152
+ "loss": 0.6783,
153
+ "losses/dpo": 0.6758668422698975,
154
+ "losses/sft": 1.7487859725952148,
155
+ "losses/total": 0.6758668422698975,
156
+ "ref_logps/chosen": -51.219181060791016,
157
+ "ref_logps/rejected": -67.0694580078125,
158
+ "rewards/accuracies": 0.5529999732971191,
159
+ "rewards/chosen": -0.03891964256763458,
160
+ "rewards/margins": 0.0738559141755104,
161
+ "rewards/rejected": -0.11277556419372559,
162
+ "step": 225
163
+ },
164
+ {
165
+ "epoch": 0.62,
166
+ "learning_rate": 4.398148148148148e-07,
167
+ "logps/chosen": -51.5633544921875,
168
+ "logps/rejected": -70.04096221923828,
169
+ "loss": 0.6606,
170
+ "losses/dpo": 0.6762534976005554,
171
+ "losses/sft": 1.7945468425750732,
172
+ "losses/total": 0.6762534976005554,
173
+ "ref_logps/chosen": -51.164485931396484,
174
+ "ref_logps/rejected": -68.47702026367188,
175
+ "rewards/accuracies": 0.5925000309944153,
176
+ "rewards/chosen": -0.03988657891750336,
177
+ "rewards/margins": 0.11650750786066055,
178
+ "rewards/rejected": -0.1563940793275833,
179
+ "step": 250
180
+ },
181
+ {
182
+ "epoch": 0.69,
183
+ "learning_rate": 4.2824074074074075e-07,
184
+ "logps/chosen": -52.760066986083984,
185
+ "logps/rejected": -71.27080535888672,
186
+ "loss": 0.6541,
187
+ "losses/dpo": 0.6559135317802429,
188
+ "losses/sft": 1.8313995599746704,
189
+ "losses/total": 0.6559135317802429,
190
+ "ref_logps/chosen": -52.14683151245117,
191
+ "ref_logps/rejected": -69.2181625366211,
192
+ "rewards/accuracies": 0.6005000472068787,
193
+ "rewards/chosen": -0.0613233856856823,
194
+ "rewards/margins": 0.14394104480743408,
195
+ "rewards/rejected": -0.20526443421840668,
196
+ "step": 275
197
+ },
198
+ {
199
+ "epoch": 0.75,
200
+ "learning_rate": 4.1666666666666667e-07,
201
+ "logps/chosen": -53.022369384765625,
202
+ "logps/rejected": -70.22279357910156,
203
+ "loss": 0.6444,
204
+ "losses/dpo": 0.6332589387893677,
205
+ "losses/sft": 1.8123092651367188,
206
+ "losses/total": 0.6332589387893677,
207
+ "ref_logps/chosen": -52.272945404052734,
208
+ "ref_logps/rejected": -67.84944152832031,
209
+ "rewards/accuracies": 0.6039999723434448,
210
+ "rewards/chosen": -0.07494210451841354,
211
+ "rewards/margins": 0.16239266097545624,
212
+ "rewards/rejected": -0.2373347282409668,
213
+ "step": 300
214
+ },
215
+ {
216
+ "epoch": 0.81,
217
+ "learning_rate": 4.050925925925926e-07,
218
+ "logps/chosen": -52.53852462768555,
219
+ "logps/rejected": -69.21473693847656,
220
+ "loss": 0.6472,
221
+ "losses/dpo": 0.6568192839622498,
222
+ "losses/sft": 1.808381199836731,
223
+ "losses/total": 0.6568192839622498,
224
+ "ref_logps/chosen": -51.50153350830078,
225
+ "ref_logps/rejected": -66.50575256347656,
226
+ "rewards/accuracies": 0.6184999942779541,
227
+ "rewards/chosen": -0.10369860380887985,
228
+ "rewards/margins": 0.1671994924545288,
229
+ "rewards/rejected": -0.27089810371398926,
230
+ "step": 325
231
+ },
232
+ {
233
+ "epoch": 0.88,
234
+ "learning_rate": 3.9351851851851854e-07,
235
+ "logps/chosen": -52.68033218383789,
236
+ "logps/rejected": -69.9216079711914,
237
+ "loss": 0.6322,
238
+ "losses/dpo": 0.6414695978164673,
239
+ "losses/sft": 1.7517987489700317,
240
+ "losses/total": 0.6414695978164673,
241
+ "ref_logps/chosen": -51.4389533996582,
242
+ "ref_logps/rejected": -66.67315673828125,
243
+ "rewards/accuracies": 0.6355000138282776,
244
+ "rewards/chosen": -0.12413790076971054,
245
+ "rewards/margins": 0.20070795714855194,
246
+ "rewards/rejected": -0.3248458802700043,
247
+ "step": 350
248
+ },
249
+ {
250
+ "epoch": 0.94,
251
+ "learning_rate": 3.819444444444444e-07,
252
+ "logps/chosen": -53.01972579956055,
253
+ "logps/rejected": -69.83106231689453,
254
+ "loss": 0.6254,
255
+ "losses/dpo": 0.6416024565696716,
256
+ "losses/sft": 1.806755781173706,
257
+ "losses/total": 0.6416024565696716,
258
+ "ref_logps/chosen": -51.760948181152344,
259
+ "ref_logps/rejected": -66.29093170166016,
260
+ "rewards/accuracies": 0.6260000467300415,
261
+ "rewards/chosen": -0.12587812542915344,
262
+ "rewards/margins": 0.22813528776168823,
263
+ "rewards/rejected": -0.3540134131908417,
264
+ "step": 375
265
+ },
266
+ {
267
+ "epoch": 1.0,
268
+ "learning_rate": 3.703703703703703e-07,
269
+ "logps/chosen": -53.866825103759766,
270
+ "logps/rejected": -73.0006332397461,
271
+ "loss": 0.6223,
272
+ "losses/dpo": 0.6281777024269104,
273
+ "losses/sft": 1.8359609842300415,
274
+ "losses/total": 0.6281777024269104,
275
+ "ref_logps/chosen": -52.29502868652344,
276
+ "ref_logps/rejected": -68.76205444335938,
277
+ "rewards/accuracies": 0.624500036239624,
278
+ "rewards/chosen": -0.15717901289463043,
279
+ "rewards/margins": 0.26667752861976624,
280
+ "rewards/rejected": -0.42385655641555786,
281
+ "step": 400
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 3.587962962962963e-07,
286
+ "logps/chosen": -52.83345031738281,
287
+ "logps/rejected": -73.19969940185547,
288
+ "loss": 0.6044,
289
+ "losses/dpo": 0.6066713929176331,
290
+ "losses/sft": 1.8482356071472168,
291
+ "losses/total": 0.6066713929176331,
292
+ "ref_logps/chosen": -51.23924255371094,
293
+ "ref_logps/rejected": -68.46554565429688,
294
+ "rewards/accuracies": 0.658500075340271,
295
+ "rewards/chosen": -0.15942102670669556,
296
+ "rewards/margins": 0.31399470567703247,
297
+ "rewards/rejected": -0.4734157621860504,
298
+ "step": 425
299
+ },
300
+ {
301
+ "epoch": 1.12,
302
+ "learning_rate": 3.472222222222222e-07,
303
+ "logps/chosen": -53.00102996826172,
304
+ "logps/rejected": -71.68830871582031,
305
+ "loss": 0.6107,
306
+ "losses/dpo": 0.5940313935279846,
307
+ "losses/sft": 1.8907349109649658,
308
+ "losses/total": 0.5940313935279846,
309
+ "ref_logps/chosen": -51.17875671386719,
310
+ "ref_logps/rejected": -66.69224548339844,
311
+ "rewards/accuracies": 0.6355000138282776,
312
+ "rewards/chosen": -0.18222743272781372,
313
+ "rewards/margins": 0.3173789978027344,
314
+ "rewards/rejected": -0.4996064305305481,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 1.19,
319
+ "learning_rate": 3.3564814814814815e-07,
320
+ "logps/chosen": -54.13747024536133,
321
+ "logps/rejected": -75.39285278320312,
322
+ "loss": 0.5906,
323
+ "losses/dpo": 0.6001964807510376,
324
+ "losses/sft": 1.837428331375122,
325
+ "losses/total": 0.6001964807510376,
326
+ "ref_logps/chosen": -52.013328552246094,
327
+ "ref_logps/rejected": -69.39274597167969,
328
+ "rewards/accuracies": 0.6559999585151672,
329
+ "rewards/chosen": -0.21241408586502075,
330
+ "rewards/margins": 0.387596994638443,
331
+ "rewards/rejected": -0.6000111103057861,
332
+ "step": 475
333
+ },
334
+ {
335
+ "epoch": 1.25,
336
+ "learning_rate": 3.2407407407407406e-07,
337
+ "logps/chosen": -52.84211730957031,
338
+ "logps/rejected": -73.44519805908203,
339
+ "loss": 0.588,
340
+ "losses/dpo": 0.5720361471176147,
341
+ "losses/sft": 1.82402765750885,
342
+ "losses/total": 0.5720361471176147,
343
+ "ref_logps/chosen": -50.860145568847656,
344
+ "ref_logps/rejected": -67.45731353759766,
345
+ "rewards/accuracies": 0.6580000519752502,
346
+ "rewards/chosen": -0.1981971114873886,
347
+ "rewards/margins": 0.40059128403663635,
348
+ "rewards/rejected": -0.5987883806228638,
349
+ "step": 500
350
+ },
351
+ {
352
+ "epoch": 1.31,
353
+ "learning_rate": 3.1249999999999997e-07,
354
+ "logps/chosen": -54.73566436767578,
355
+ "logps/rejected": -73.35672760009766,
356
+ "loss": 0.5864,
357
+ "losses/dpo": 0.5692346096038818,
358
+ "losses/sft": 1.8467037677764893,
359
+ "losses/total": 0.5692346096038818,
360
+ "ref_logps/chosen": -52.32421875,
361
+ "ref_logps/rejected": -66.84322357177734,
362
+ "rewards/accuracies": 0.6690000295639038,
363
+ "rewards/chosen": -0.24114486575126648,
364
+ "rewards/margins": 0.4102060794830322,
365
+ "rewards/rejected": -0.6513509154319763,
366
+ "step": 525
367
+ },
368
+ {
369
+ "epoch": 1.38,
370
+ "learning_rate": 3.0092592592592594e-07,
371
+ "logps/chosen": -54.00200653076172,
372
+ "logps/rejected": -77.5279312133789,
373
+ "loss": 0.5712,
374
+ "losses/dpo": 0.5749830007553101,
375
+ "losses/sft": 1.8555569648742676,
376
+ "losses/total": 0.5749830007553101,
377
+ "ref_logps/chosen": -51.46855163574219,
378
+ "ref_logps/rejected": -70.09744262695312,
379
+ "rewards/accuracies": 0.6720000505447388,
380
+ "rewards/chosen": -0.25334587693214417,
381
+ "rewards/margins": 0.48970410227775574,
382
+ "rewards/rejected": -0.7430499196052551,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 1.44,
387
+ "learning_rate": 2.8935185185185185e-07,
388
+ "logps/chosen": -53.216957092285156,
389
+ "logps/rejected": -74.7680892944336,
390
+ "loss": 0.5712,
391
+ "losses/dpo": 0.5629280209541321,
392
+ "losses/sft": 1.8214166164398193,
393
+ "losses/total": 0.5629280209541321,
394
+ "ref_logps/chosen": -50.563720703125,
395
+ "ref_logps/rejected": -67.18008422851562,
396
+ "rewards/accuracies": 0.6755000352859497,
397
+ "rewards/chosen": -0.2653237581253052,
398
+ "rewards/margins": 0.4934755265712738,
399
+ "rewards/rejected": -0.7587993144989014,
400
+ "step": 575
401
+ },
402
+ {
403
+ "epoch": 1.5,
404
+ "learning_rate": 2.7777777777777776e-07,
405
+ "logps/chosen": -55.059600830078125,
406
+ "logps/rejected": -76.23825073242188,
407
+ "loss": 0.5577,
408
+ "losses/dpo": 0.5477968454360962,
409
+ "losses/sft": 1.9264967441558838,
410
+ "losses/total": 0.5477968454360962,
411
+ "ref_logps/chosen": -52.09434509277344,
412
+ "ref_logps/rejected": -67.781005859375,
413
+ "rewards/accuracies": 0.6880000233650208,
414
+ "rewards/chosen": -0.2965255081653595,
415
+ "rewards/margins": 0.5491979122161865,
416
+ "rewards/rejected": -0.8457233905792236,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 1.56,
421
+ "learning_rate": 2.662037037037037e-07,
422
+ "logps/chosen": -56.8978271484375,
423
+ "logps/rejected": -78.7895736694336,
424
+ "loss": 0.5667,
425
+ "losses/dpo": 0.5594893097877502,
426
+ "losses/sft": 1.9116603136062622,
427
+ "losses/total": 0.5594893097877502,
428
+ "ref_logps/chosen": -53.51485824584961,
429
+ "ref_logps/rejected": -69.69821166992188,
430
+ "rewards/accuracies": 0.6595000624656677,
431
+ "rewards/chosen": -0.338296502828598,
432
+ "rewards/margins": 0.5708397030830383,
433
+ "rewards/rejected": -0.9091362953186035,
434
+ "step": 625
435
+ },
436
+ {
437
+ "epoch": 1.62,
438
+ "learning_rate": 2.5462962962962963e-07,
439
+ "logps/chosen": -55.704219818115234,
440
+ "logps/rejected": -77.06362915039062,
441
+ "loss": 0.5734,
442
+ "losses/dpo": 0.583986222743988,
443
+ "losses/sft": 1.8731328248977661,
444
+ "losses/total": 0.583986222743988,
445
+ "ref_logps/chosen": -52.17438888549805,
446
+ "ref_logps/rejected": -67.96855926513672,
447
+ "rewards/accuracies": 0.6725000143051147,
448
+ "rewards/chosen": -0.3529825508594513,
449
+ "rewards/margins": 0.5565236806869507,
450
+ "rewards/rejected": -0.9095062017440796,
451
+ "step": 650
452
+ },
453
+ {
454
+ "epoch": 1.69,
455
+ "learning_rate": 2.4305555555555555e-07,
456
+ "logps/chosen": -54.6390266418457,
457
+ "logps/rejected": -76.69291687011719,
458
+ "loss": 0.5568,
459
+ "losses/dpo": 0.5715891718864441,
460
+ "losses/sft": 1.8541003465652466,
461
+ "losses/total": 0.5715891718864441,
462
+ "ref_logps/chosen": -51.134674072265625,
463
+ "ref_logps/rejected": -67.13263702392578,
464
+ "rewards/accuracies": 0.687000036239624,
465
+ "rewards/chosen": -0.3504358232021332,
466
+ "rewards/margins": 0.605591893196106,
467
+ "rewards/rejected": -0.9560276865959167,
468
+ "step": 675
469
+ },
470
+ {
471
+ "epoch": 1.75,
472
+ "learning_rate": 2.3148148148148148e-07,
473
+ "logps/chosen": -54.50948715209961,
474
+ "logps/rejected": -77.46994018554688,
475
+ "loss": 0.5657,
476
+ "losses/dpo": 0.5799014568328857,
477
+ "losses/sft": 1.8863662481307983,
478
+ "losses/total": 0.5799014568328857,
479
+ "ref_logps/chosen": -50.88352966308594,
480
+ "ref_logps/rejected": -67.8268051147461,
481
+ "rewards/accuracies": 0.6755000352859497,
482
+ "rewards/chosen": -0.36259526014328003,
483
+ "rewards/margins": 0.6017180681228638,
484
+ "rewards/rejected": -0.9643133282661438,
485
+ "step": 700
486
+ },
487
+ {
488
+ "epoch": 1.81,
489
+ "learning_rate": 2.199074074074074e-07,
490
+ "logps/chosen": -55.129295349121094,
491
+ "logps/rejected": -77.04840087890625,
492
+ "loss": 0.5636,
493
+ "losses/dpo": 0.5747159123420715,
494
+ "losses/sft": 1.926343560218811,
495
+ "losses/total": 0.5747159123420715,
496
+ "ref_logps/chosen": -51.24678039550781,
497
+ "ref_logps/rejected": -67.05614471435547,
498
+ "rewards/accuracies": 0.6730000376701355,
499
+ "rewards/chosen": -0.3882514238357544,
500
+ "rewards/margins": 0.6109741926193237,
501
+ "rewards/rejected": -0.9992256760597229,
502
+ "step": 725
503
+ },
504
+ {
505
+ "epoch": 1.88,
506
+ "learning_rate": 2.0833333333333333e-07,
507
+ "logps/chosen": -55.22993087768555,
508
+ "logps/rejected": -78.04380798339844,
509
+ "loss": 0.5575,
510
+ "losses/dpo": 0.5746473073959351,
511
+ "losses/sft": 1.8473073244094849,
512
+ "losses/total": 0.5746473073959351,
513
+ "ref_logps/chosen": -51.49522399902344,
514
+ "ref_logps/rejected": -67.86705017089844,
515
+ "rewards/accuracies": 0.6645000576972961,
516
+ "rewards/chosen": -0.3734706938266754,
517
+ "rewards/margins": 0.6442059278488159,
518
+ "rewards/rejected": -1.0176767110824585,
519
+ "step": 750
520
+ },
521
+ {
522
+ "epoch": 1.94,
523
+ "learning_rate": 1.9675925925925927e-07,
524
+ "logps/chosen": -54.95744323730469,
525
+ "logps/rejected": -77.55332946777344,
526
+ "loss": 0.5428,
527
+ "losses/dpo": 0.5475051999092102,
528
+ "losses/sft": 1.8516862392425537,
529
+ "losses/total": 0.5475051999092102,
530
+ "ref_logps/chosen": -51.13042449951172,
531
+ "ref_logps/rejected": -66.90882110595703,
532
+ "rewards/accuracies": 0.6959999799728394,
533
+ "rewards/chosen": -0.382702112197876,
534
+ "rewards/margins": 0.6817485690116882,
535
+ "rewards/rejected": -1.064450740814209,
536
+ "step": 775
537
+ },
538
+ {
539
+ "epoch": 2.0,
540
+ "learning_rate": 1.8518518518518516e-07,
541
+ "logps/chosen": -56.311126708984375,
542
+ "logps/rejected": -79.54733276367188,
543
+ "loss": 0.5453,
544
+ "losses/dpo": 0.529507577419281,
545
+ "losses/sft": 1.9264029264450073,
546
+ "losses/total": 0.529507577419281,
547
+ "ref_logps/chosen": -51.99349594116211,
548
+ "ref_logps/rejected": -68.2928237915039,
549
+ "rewards/accuracies": 0.7039999961853027,
550
+ "rewards/chosen": -0.4317638874053955,
551
+ "rewards/margins": 0.6936854720115662,
552
+ "rewards/rejected": -1.1254491806030273,
553
+ "step": 800
554
+ },
555
+ {
556
+ "epoch": 2.06,
557
+ "learning_rate": 1.736111111111111e-07,
558
+ "logps/chosen": -55.88136672973633,
559
+ "logps/rejected": -78.89193725585938,
560
+ "loss": 0.5536,
561
+ "losses/dpo": 0.5027932524681091,
562
+ "losses/sft": 1.888191819190979,
563
+ "losses/total": 0.5027932524681091,
564
+ "ref_logps/chosen": -51.50459289550781,
565
+ "ref_logps/rejected": -67.58809661865234,
566
+ "rewards/accuracies": 0.7039999961853027,
567
+ "rewards/chosen": -0.43767789006233215,
568
+ "rewards/margins": 0.6927057504653931,
569
+ "rewards/rejected": -1.1303836107254028,
570
+ "step": 825
571
+ },
572
+ {
573
+ "epoch": 2.12,
574
+ "learning_rate": 1.6203703703703703e-07,
575
+ "logps/chosen": -56.851905822753906,
576
+ "logps/rejected": -79.84717559814453,
577
+ "loss": 0.5519,
578
+ "losses/dpo": 0.5627533197402954,
579
+ "losses/sft": 1.950637936592102,
580
+ "losses/total": 0.5627533197402954,
581
+ "ref_logps/chosen": -52.47712326049805,
582
+ "ref_logps/rejected": -68.6231918334961,
583
+ "rewards/accuracies": 0.6880000233650208,
584
+ "rewards/chosen": -0.4374789893627167,
585
+ "rewards/margins": 0.6849193572998047,
586
+ "rewards/rejected": -1.1223982572555542,
587
+ "step": 850
588
+ },
589
+ {
590
+ "epoch": 2.19,
591
+ "learning_rate": 1.5046296296296297e-07,
592
+ "logps/chosen": -56.792240142822266,
593
+ "logps/rejected": -80.210205078125,
594
+ "loss": 0.5433,
595
+ "losses/dpo": 0.562707245349884,
596
+ "losses/sft": 1.9036242961883545,
597
+ "losses/total": 0.562707245349884,
598
+ "ref_logps/chosen": -52.128883361816406,
599
+ "ref_logps/rejected": -68.35295104980469,
600
+ "rewards/accuracies": 0.6974999904632568,
601
+ "rewards/chosen": -0.46633559465408325,
602
+ "rewards/margins": 0.719389796257019,
603
+ "rewards/rejected": -1.185725450515747,
604
+ "step": 875
605
+ },
606
+ {
607
+ "epoch": 2.25,
608
+ "learning_rate": 1.3888888888888888e-07,
609
+ "logps/chosen": -56.366416931152344,
610
+ "logps/rejected": -81.07738494873047,
611
+ "loss": 0.5341,
612
+ "losses/dpo": 0.5471935868263245,
613
+ "losses/sft": 1.9060190916061401,
614
+ "losses/total": 0.5471935868263245,
615
+ "ref_logps/chosen": -51.77671432495117,
616
+ "ref_logps/rejected": -68.86495971679688,
617
+ "rewards/accuracies": 0.6914999485015869,
618
+ "rewards/chosen": -0.45896950364112854,
619
+ "rewards/margins": 0.7622722387313843,
620
+ "rewards/rejected": -1.2212417125701904,
621
+ "step": 900
622
+ },
623
+ {
624
+ "epoch": 2.31,
625
+ "learning_rate": 1.2731481481481482e-07,
626
+ "logps/chosen": -56.640018463134766,
627
+ "logps/rejected": -81.28205871582031,
628
+ "loss": 0.5195,
629
+ "losses/dpo": 0.5150865316390991,
630
+ "losses/sft": 1.918342113494873,
631
+ "losses/total": 0.5150865316390991,
632
+ "ref_logps/chosen": -52.07315444946289,
633
+ "ref_logps/rejected": -68.57891082763672,
634
+ "rewards/accuracies": 0.7275000214576721,
635
+ "rewards/chosen": -0.4566872715950012,
636
+ "rewards/margins": 0.8136279582977295,
637
+ "rewards/rejected": -1.2703152894973755,
638
+ "step": 925
639
+ },
640
+ {
641
+ "epoch": 2.38,
642
+ "learning_rate": 1.1574074074074074e-07,
643
+ "logps/chosen": -56.610965728759766,
644
+ "logps/rejected": -82.6984634399414,
645
+ "loss": 0.5267,
646
+ "losses/dpo": 0.5297408103942871,
647
+ "losses/sft": 1.9613755941390991,
648
+ "losses/total": 0.5297408103942871,
649
+ "ref_logps/chosen": -51.79991149902344,
650
+ "ref_logps/rejected": -69.79439544677734,
651
+ "rewards/accuracies": 0.7074999809265137,
652
+ "rewards/chosen": -0.4811055660247803,
653
+ "rewards/margins": 0.8093020915985107,
654
+ "rewards/rejected": -1.2904075384140015,
655
+ "step": 950
656
+ },
657
+ {
658
+ "epoch": 2.44,
659
+ "learning_rate": 1.0416666666666667e-07,
660
+ "logps/chosen": -55.42496109008789,
661
+ "logps/rejected": -80.11758422851562,
662
+ "loss": 0.5346,
663
+ "losses/dpo": 0.5188722610473633,
664
+ "losses/sft": 1.9635555744171143,
665
+ "losses/total": 0.5188722610473633,
666
+ "ref_logps/chosen": -50.690673828125,
667
+ "ref_logps/rejected": -67.66686248779297,
668
+ "rewards/accuracies": 0.7055000066757202,
669
+ "rewards/chosen": -0.47342926263809204,
670
+ "rewards/margins": 0.7716432213783264,
671
+ "rewards/rejected": -1.2450724840164185,
672
+ "step": 975
673
+ },
674
+ {
675
+ "epoch": 2.5,
676
+ "learning_rate": 9.259259259259258e-08,
677
+ "logps/chosen": -55.2717399597168,
678
+ "logps/rejected": -78.23765563964844,
679
+ "loss": 0.5358,
680
+ "losses/dpo": 0.522473156452179,
681
+ "losses/sft": 1.9380919933319092,
682
+ "losses/total": 0.522473156452179,
683
+ "ref_logps/chosen": -50.576473236083984,
684
+ "ref_logps/rejected": -65.8857192993164,
685
+ "rewards/accuracies": 0.6880000233650208,
686
+ "rewards/chosen": -0.46952569484710693,
687
+ "rewards/margins": 0.7656675577163696,
688
+ "rewards/rejected": -1.2351933717727661,
689
+ "step": 1000
690
+ },
691
+ {
692
+ "epoch": 2.56,
693
+ "learning_rate": 8.101851851851852e-08,
694
+ "logps/chosen": -57.238101959228516,
695
+ "logps/rejected": -80.15925598144531,
696
+ "loss": 0.5477,
697
+ "losses/dpo": 0.5454632043838501,
698
+ "losses/sft": 1.9586001634597778,
699
+ "losses/total": 0.5454632043838501,
700
+ "ref_logps/chosen": -51.92245101928711,
701
+ "ref_logps/rejected": -67.11357879638672,
702
+ "rewards/accuracies": 0.6919999718666077,
703
+ "rewards/chosen": -0.5315641164779663,
704
+ "rewards/margins": 0.7730042934417725,
705
+ "rewards/rejected": -1.3045685291290283,
706
+ "step": 1025
707
+ },
708
+ {
709
+ "epoch": 2.62,
710
+ "learning_rate": 6.944444444444444e-08,
711
+ "logps/chosen": -56.340274810791016,
712
+ "logps/rejected": -78.74750518798828,
713
+ "loss": 0.5353,
714
+ "losses/dpo": 0.564401388168335,
715
+ "losses/sft": 1.9238648414611816,
716
+ "losses/total": 0.564401388168335,
717
+ "ref_logps/chosen": -51.298805236816406,
718
+ "ref_logps/rejected": -66.08538818359375,
719
+ "rewards/accuracies": 0.7080000042915344,
720
+ "rewards/chosen": -0.5041469931602478,
721
+ "rewards/margins": 0.7620646953582764,
722
+ "rewards/rejected": -1.2662116289138794,
723
+ "step": 1050
724
+ },
725
+ {
726
+ "epoch": 2.69,
727
+ "learning_rate": 5.787037037037037e-08,
728
+ "logps/chosen": -58.54380416870117,
729
+ "logps/rejected": -82.85121154785156,
730
+ "loss": 0.5379,
731
+ "losses/dpo": 0.4951947033405304,
732
+ "losses/sft": 1.927669882774353,
733
+ "losses/total": 0.4951947033405304,
734
+ "ref_logps/chosen": -53.125526428222656,
735
+ "ref_logps/rejected": -69.30874633789062,
736
+ "rewards/accuracies": 0.699999988079071,
737
+ "rewards/chosen": -0.5418279767036438,
738
+ "rewards/margins": 0.8124186992645264,
739
+ "rewards/rejected": -1.3542466163635254,
740
+ "step": 1075
741
+ },
742
+ {
743
+ "epoch": 2.75,
744
+ "learning_rate": 4.629629629629629e-08,
745
+ "logps/chosen": -56.923858642578125,
746
+ "logps/rejected": -81.64384460449219,
747
+ "loss": 0.5352,
748
+ "losses/dpo": 0.5503857135772705,
749
+ "losses/sft": 1.9750890731811523,
750
+ "losses/total": 0.5503857135772705,
751
+ "ref_logps/chosen": -51.61204528808594,
752
+ "ref_logps/rejected": -68.07569122314453,
753
+ "rewards/accuracies": 0.7145000696182251,
754
+ "rewards/chosen": -0.5311816930770874,
755
+ "rewards/margins": 0.8256343603134155,
756
+ "rewards/rejected": -1.356816053390503,
757
+ "step": 1100
758
+ },
759
+ {
760
+ "epoch": 2.81,
761
+ "learning_rate": 3.472222222222222e-08,
762
+ "logps/chosen": -57.147186279296875,
763
+ "logps/rejected": -79.592529296875,
764
+ "loss": 0.541,
765
+ "losses/dpo": 0.5187271237373352,
766
+ "losses/sft": 1.9453308582305908,
767
+ "losses/total": 0.5187271237373352,
768
+ "ref_logps/chosen": -52.0300178527832,
769
+ "ref_logps/rejected": -66.7751693725586,
770
+ "rewards/accuracies": 0.7010000348091125,
771
+ "rewards/chosen": -0.5117166042327881,
772
+ "rewards/margins": 0.7700192332267761,
773
+ "rewards/rejected": -1.281735897064209,
774
+ "step": 1125
775
+ },
776
+ {
777
+ "epoch": 2.88,
778
+ "learning_rate": 2.3148148148148144e-08,
779
+ "logps/chosen": -56.04924011230469,
780
+ "logps/rejected": -82.78221893310547,
781
+ "loss": 0.5138,
782
+ "losses/dpo": 0.5014563202857971,
783
+ "losses/sft": 1.892021656036377,
784
+ "losses/total": 0.5014563202857971,
785
+ "ref_logps/chosen": -50.947792053222656,
786
+ "ref_logps/rejected": -68.86062622070312,
787
+ "rewards/accuracies": 0.7239999175071716,
788
+ "rewards/chosen": -0.5101450085639954,
789
+ "rewards/margins": 0.8820143938064575,
790
+ "rewards/rejected": -1.3921594619750977,
791
+ "step": 1150
792
+ },
793
+ {
794
+ "epoch": 2.94,
795
+ "learning_rate": 1.1574074074074072e-08,
796
+ "logps/chosen": -57.95354461669922,
797
+ "logps/rejected": -81.48481750488281,
798
+ "loss": 0.5322,
799
+ "losses/dpo": 0.5276296138763428,
800
+ "losses/sft": 1.914094090461731,
801
+ "losses/total": 0.5276296138763428,
802
+ "ref_logps/chosen": -52.49882507324219,
803
+ "ref_logps/rejected": -68.10453796386719,
804
+ "rewards/accuracies": 0.7064999938011169,
805
+ "rewards/chosen": -0.5454717874526978,
806
+ "rewards/margins": 0.7925567030906677,
807
+ "rewards/rejected": -1.3380285501480103,
808
+ "step": 1175
809
+ },
810
+ {
811
+ "epoch": 3.0,
812
+ "learning_rate": 0.0,
813
+ "logps/chosen": -58.058658599853516,
814
+ "logps/rejected": -80.37024688720703,
815
+ "loss": 0.5418,
816
+ "losses/dpo": 0.5331112146377563,
817
+ "losses/sft": 1.9555615186691284,
818
+ "losses/total": 0.5331112146377563,
819
+ "ref_logps/chosen": -52.748592376708984,
820
+ "ref_logps/rejected": -67.33932495117188,
821
+ "rewards/accuracies": 0.6899999976158142,
822
+ "rewards/chosen": -0.5310066342353821,
823
+ "rewards/margins": 0.7720865607261658,
824
+ "rewards/rejected": -1.3030930757522583,
825
+ "step": 1200
826
+ },
827
+ {
828
+ "epoch": 3.0,
829
+ "step": 1200,
830
+ "total_flos": 0.0,
831
+ "train_loss": 0.5943309187889099,
832
+ "train_runtime": 16038.0463,
833
+ "train_samples_per_second": 5.984,
834
+ "train_steps_per_second": 0.075
835
+ }
836
+ ],
837
+ "logging_steps": 25,
838
+ "max_steps": 1200,
839
+ "num_input_tokens_seen": 0,
840
+ "num_train_epochs": 3,
841
+ "save_steps": 500,
842
+ "stateful_callbacks": {},
843
+ "total_flos": 0.0,
844
+ "train_batch_size": 4,
845
+ "trial_name": null,
846
+ "trial_params": null
847
+ }