osawar51 commited on
Commit
fe1f869
·
verified ·
1 Parent(s): 4af330f

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +4 -4
  2. model.safetensors +1 -1
  3. train_results.json +4 -4
  4. trainer_state.json +108 -108
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.094016134738922e-06,
4
- "train_runtime": 219.6957,
5
  "train_samples": 28,
6
- "train_samples_per_second": 1.457,
7
- "train_steps_per_second": 0.091
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 9.198513089359039e-06,
4
+ "train_runtime": 215.0623,
5
  "train_samples": 28,
6
+ "train_samples_per_second": 1.488,
7
+ "train_steps_per_second": 0.093
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72404ef325bc370797abf84fc71a74b94fcac3f316939ae72fa8a470b1bef6c5
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ef080e08d6d7b7bb62db78a3ca124283306718c648f0c05a148bc0e8d6f7ae
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.094016134738922e-06,
4
- "train_runtime": 219.6957,
5
  "train_samples": 28,
6
- "train_samples_per_second": 1.457,
7
- "train_steps_per_second": 0.091
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 9.198513089359039e-06,
4
+ "train_runtime": 215.0623,
5
  "train_samples": 28,
6
+ "train_samples_per_second": 1.488,
7
+ "train_steps_per_second": 0.093
8
  }
trainer_state.json CHANGED
@@ -10,203 +10,203 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 344.65625,
14
  "epoch": 0.5714285714285714,
15
- "grad_norm": 5.017642974853516,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
- "loss": -0.0,
19
- "reward": 1.6484445855021477,
20
- "reward_std": 0.9143233702052385,
21
- "rewards/concensus_correctness_reward_func": 0.15193749591708183,
22
- "rewards/consensus_reward_func": 0.3125,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 0.0,
25
- "rewards/question_recreation_reward_func": 0.4839133294299245,
26
  "rewards/soft_format_reward_func": 0.0,
27
- "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": 0.7000937471166253,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 294.5416666666667,
33
  "epoch": 1.0,
34
- "grad_norm": 3.5389950275421143,
35
- "kl": 0.0013586016696838972,
36
  "learning_rate": 4.864543104251586e-07,
37
  "loss": 0.0,
38
- "reward": 2.2336954176425934,
39
- "reward_std": 1.1887832844319444,
40
- "rewards/concensus_correctness_reward_func": 0.34041666736205417,
41
- "rewards/consensus_reward_func": 0.25,
42
  "rewards/cumulative_reward_2": 0.0,
43
- "rewards/final_correctness_reward_func": 0.08333333333333333,
44
- "rewards/question_recreation_reward_func": 0.638487059623003,
45
  "rewards/soft_format_reward_func": 0.0,
46
- "rewards/strict_format_reward_func": 0.020833333333333332,
47
- "rewards/xmlcount_reward_func": 0.900625005364418,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 321.46875,
52
  "epoch": 1.5714285714285714,
53
- "grad_norm": 5.161291122436523,
54
- "kl": 0.0034227508440380916,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0,
57
- "reward": 1.9783577173948288,
58
- "reward_std": 1.2956023588776588,
59
- "rewards/concensus_correctness_reward_func": 0.30268749594688416,
60
- "rewards/consensus_reward_func": 0.3125,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.0625,
63
- "rewards/question_recreation_reward_func": 0.632795225828886,
64
  "rewards/soft_format_reward_func": 0.0,
65
- "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": 0.6678749807178974,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 281.6666666666667,
71
  "epoch": 2.0,
72
- "grad_norm": 3.7111079692840576,
73
- "kl": 0.004361502108319352,
74
  "learning_rate": 3.867370395306068e-07,
75
  "loss": 0.0,
76
- "reward": 2.383881355325381,
77
- "reward_std": 0.5245453078920642,
78
- "rewards/concensus_correctness_reward_func": 0.26100000242392224,
79
- "rewards/consensus_reward_func": 0.4166666666666667,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
82
- "rewards/question_recreation_reward_func": 0.642589658498764,
83
  "rewards/soft_format_reward_func": 0.0,
84
- "rewards/strict_format_reward_func": 0.0625,
85
- "rewards/xmlcount_reward_func": 1.0011249979337056,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 301.5625,
90
  "epoch": 2.571428571428571,
91
- "grad_norm": 4.941656589508057,
92
- "kl": 0.006099865640862845,
93
  "learning_rate": 3.1137137178519977e-07,
94
  "loss": 0.0,
95
- "reward": 2.2924628891050816,
96
- "reward_std": 0.7776006847198005,
97
- "rewards/concensus_correctness_reward_func": 0.34712499752640724,
98
- "rewards/consensus_reward_func": 0.375,
99
  "rewards/cumulative_reward_2": 0.0,
100
- "rewards/final_correctness_reward_func": 0.125,
101
- "rewards/question_recreation_reward_func": 0.5014628782519139,
102
  "rewards/soft_format_reward_func": 0.0,
103
- "rewards/strict_format_reward_func": 0.015625,
104
- "rewards/xmlcount_reward_func": 0.9282500073313713,
105
  "step": 10
106
  },
107
  {
108
- "completion_length": 199.0,
109
  "epoch": 3.0,
110
- "grad_norm": 4.053720951080322,
111
- "kl": 0.007232943249012654,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0,
114
- "reward": 2.1971174677213035,
115
- "reward_std": 0.7274505247720905,
116
- "rewards/concensus_correctness_reward_func": 0.17933333168427149,
117
- "rewards/consensus_reward_func": 0.3333333333333333,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
120
- "rewards/question_recreation_reward_func": 0.736742460479339,
121
  "rewards/soft_format_reward_func": 0.0,
122
- "rewards/strict_format_reward_func": 0.0,
123
- "rewards/xmlcount_reward_func": 0.9477083285649618,
124
  "step": 12
125
  },
126
  {
127
- "completion_length": 264.28125,
128
  "epoch": 3.571428571428571,
129
- "grad_norm": 14.958691596984863,
130
- "kl": 0.008564002040657215,
131
  "learning_rate": 1.4957614383675767e-07,
132
  "loss": 0.0,
133
- "reward": 2.5280247405171394,
134
- "reward_std": 1.2076747512328438,
135
- "rewards/concensus_correctness_reward_func": 0.32162499986588955,
136
- "rewards/consensus_reward_func": 0.3125,
137
  "rewards/cumulative_reward_2": 0.0,
138
- "rewards/final_correctness_reward_func": 0.0625,
139
- "rewards/question_recreation_reward_func": 0.6926184613257647,
140
  "rewards/soft_format_reward_func": 0.0,
141
- "rewards/strict_format_reward_func": 0.046875,
142
- "rewards/xmlcount_reward_func": 1.0919062793254852,
143
  "step": 14
144
  },
145
  {
146
- "completion_length": 256.0,
147
  "epoch": 4.0,
148
- "grad_norm": 3.648608446121216,
149
- "kl": 0.008650898894605538,
150
  "learning_rate": 8.067960709356478e-08,
151
  "loss": 0.0,
152
- "reward": 2.4574286540349326,
153
- "reward_std": 0.6175319347530603,
154
- "rewards/concensus_correctness_reward_func": 0.32233334084351856,
155
  "rewards/consensus_reward_func": 0.4166666666666667,
156
  "rewards/cumulative_reward_2": 0.0,
157
- "rewards/final_correctness_reward_func": 0.16666666666666666,
158
- "rewards/question_recreation_reward_func": 0.544261984527111,
159
  "rewards/soft_format_reward_func": 0.0,
160
- "rewards/strict_format_reward_func": 0.0625,
161
- "rewards/xmlcount_reward_func": 0.9449999928474426,
162
  "step": 16
163
  },
164
  {
165
- "completion_length": 221.4375,
166
  "epoch": 4.571428571428571,
167
- "grad_norm": 5.220948696136475,
168
- "kl": 0.010831554245669395,
169
  "learning_rate": 3.013156219837776e-08,
170
  "loss": 0.0,
171
- "reward": 2.395220074802637,
172
- "reward_std": 0.6563976746983826,
173
- "rewards/concensus_correctness_reward_func": 0.3020000010728836,
174
- "rewards/consensus_reward_func": 0.3125,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.125,
177
- "rewards/question_recreation_reward_func": 0.6267513148486614,
178
  "rewards/soft_format_reward_func": 0.0,
179
- "rewards/strict_format_reward_func": 0.0625,
180
- "rewards/xmlcount_reward_func": 0.9664687551558018,
181
  "step": 18
182
  },
183
  {
184
- "completion_length": 255.0,
185
  "epoch": 5.0,
186
- "grad_norm": 3.633751630783081,
187
- "kl": 0.009951268167545399,
188
  "learning_rate": 3.4096741493194193e-09,
189
  "loss": 0.0,
190
- "reward": 1.8240316808223724,
191
- "reward_std": 0.2700964094450076,
192
- "rewards/concensus_correctness_reward_func": 0.04483333230018616,
193
- "rewards/consensus_reward_func": 0.16666666666666666,
194
  "rewards/cumulative_reward_2": 0.0,
195
- "rewards/final_correctness_reward_func": 0.0,
196
- "rewards/question_recreation_reward_func": 0.5966150037323436,
197
  "rewards/soft_format_reward_func": 0.0,
198
- "rewards/strict_format_reward_func": 0.041666666666666664,
199
- "rewards/xmlcount_reward_func": 0.9742500086625417,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 5.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 6.094016134738922e-06,
207
- "train_runtime": 219.6957,
208
- "train_samples_per_second": 1.457,
209
- "train_steps_per_second": 0.091
210
  }
211
  ],
212
  "logging_steps": 2,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 298.0625,
14
  "epoch": 0.5714285714285714,
15
+ "grad_norm": 6.717251777648926,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
+ "loss": 0.0,
19
+ "reward": 2.065369065850973,
20
+ "reward_std": 1.6619373487774283,
21
+ "rewards/concensus_correctness_reward_func": 0.39149999618530273,
22
+ "rewards/consensus_reward_func": 0.5,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 0.0,
25
+ "rewards/question_recreation_reward_func": 0.45577534078620374,
26
  "rewards/soft_format_reward_func": 0.0,
27
+ "rewards/strict_format_reward_func": 0.03125,
28
+ "rewards/xmlcount_reward_func": 0.6868437463417649,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 266.2916666666667,
33
  "epoch": 1.0,
34
+ "grad_norm": 3.6867785453796387,
35
+ "kl": 0.001406152257307743,
36
  "learning_rate": 4.864543104251586e-07,
37
  "loss": 0.0,
38
+ "reward": 2.4803318232297897,
39
+ "reward_std": 1.9100091128299634,
40
+ "rewards/concensus_correctness_reward_func": 0.5114166711767515,
41
+ "rewards/consensus_reward_func": 0.4166666666666667,
42
  "rewards/cumulative_reward_2": 0.0,
43
+ "rewards/final_correctness_reward_func": 0.25,
44
+ "rewards/question_recreation_reward_func": 0.49745685296754044,
45
  "rewards/soft_format_reward_func": 0.0,
46
+ "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": 0.8047916715343794,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 204.0625,
52
  "epoch": 1.5714285714285714,
53
+ "grad_norm": 8.32531452178955,
54
+ "kl": 0.011501419357955456,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0,
57
+ "reward": 2.601933002471924,
58
+ "reward_std": 0.9620060075540096,
59
+ "rewards/concensus_correctness_reward_func": 0.4566875025629997,
60
+ "rewards/consensus_reward_func": 0.4375,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.0625,
63
+ "rewards/question_recreation_reward_func": 0.5837454923894256,
64
  "rewards/soft_format_reward_func": 0.0,
65
+ "rewards/strict_format_reward_func": 0.09375,
66
+ "rewards/xmlcount_reward_func": 0.9677500203251839,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 227.25,
71
  "epoch": 2.0,
72
+ "grad_norm": 4.771548271179199,
73
+ "kl": 0.006304021313553676,
74
  "learning_rate": 3.867370395306068e-07,
75
  "loss": 0.0,
76
+ "reward": 3.030232419570287,
77
+ "reward_std": 1.8522576310982306,
78
+ "rewards/concensus_correctness_reward_func": 0.7245000004768372,
79
+ "rewards/consensus_reward_func": 0.75,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
82
+ "rewards/question_recreation_reward_func": 0.6421075016260147,
83
  "rewards/soft_format_reward_func": 0.0,
84
+ "rewards/strict_format_reward_func": 0.020833333333333332,
85
+ "rewards/xmlcount_reward_func": 0.892791673541069,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 249.625,
90
  "epoch": 2.571428571428571,
91
+ "grad_norm": 6.5946149826049805,
92
+ "kl": 0.011220796084671747,
93
  "learning_rate": 3.1137137178519977e-07,
94
  "loss": 0.0,
95
+ "reward": 2.330574309453368,
96
+ "reward_std": 0.9070390482665971,
97
+ "rewards/concensus_correctness_reward_func": 0.3594375029206276,
98
+ "rewards/consensus_reward_func": 0.3125,
99
  "rewards/cumulative_reward_2": 0.0,
100
+ "rewards/final_correctness_reward_func": 0.1875,
101
+ "rewards/question_recreation_reward_func": 0.42963684926507995,
102
  "rewards/soft_format_reward_func": 0.0,
103
+ "rewards/strict_format_reward_func": 0.046875,
104
+ "rewards/xmlcount_reward_func": 0.9946250058710575,
105
  "step": 10
106
  },
107
  {
108
+ "completion_length": 284.0833333333333,
109
  "epoch": 3.0,
110
+ "grad_norm": 8.51677131652832,
111
+ "kl": 0.01575400565828507,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0,
114
+ "reward": 2.885760868589083,
115
+ "reward_std": 1.9988142798344295,
116
+ "rewards/concensus_correctness_reward_func": 0.710416667163372,
117
+ "rewards/consensus_reward_func": 0.75,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
120
+ "rewards/question_recreation_reward_func": 0.6493442542850971,
121
  "rewards/soft_format_reward_func": 0.0,
122
+ "rewards/strict_format_reward_func": 0.041666666666666664,
123
+ "rewards/xmlcount_reward_func": 0.7343333313862482,
124
  "step": 12
125
  },
126
  {
127
+ "completion_length": 287.21875,
128
  "epoch": 3.571428571428571,
129
+ "grad_norm": 5.919245719909668,
130
+ "kl": 0.01331145316362381,
131
  "learning_rate": 1.4957614383675767e-07,
132
  "loss": 0.0,
133
+ "reward": 2.9632712304592133,
134
+ "reward_std": 1.4405226800590754,
135
+ "rewards/concensus_correctness_reward_func": 0.756249999627471,
136
+ "rewards/consensus_reward_func": 0.75,
137
  "rewards/cumulative_reward_2": 0.0,
138
+ "rewards/final_correctness_reward_func": 0.0,
139
+ "rewards/question_recreation_reward_func": 0.6578337242826819,
140
  "rewards/soft_format_reward_func": 0.0,
141
+ "rewards/strict_format_reward_func": 0.015625,
142
+ "rewards/xmlcount_reward_func": 0.7835625000298023,
143
  "step": 14
144
  },
145
  {
146
+ "completion_length": 290.2083333333333,
147
  "epoch": 4.0,
148
+ "grad_norm": 5.046748161315918,
149
+ "kl": 0.023930478307496134,
150
  "learning_rate": 8.067960709356478e-08,
151
  "loss": 0.0,
152
+ "reward": 2.493785634636879,
153
+ "reward_std": 0.5378106032779518,
154
+ "rewards/concensus_correctness_reward_func": 0.34049999713897705,
155
  "rewards/consensus_reward_func": 0.4166666666666667,
156
  "rewards/cumulative_reward_2": 0.0,
157
+ "rewards/final_correctness_reward_func": 0.25,
158
+ "rewards/question_recreation_reward_func": 0.5334939612851789,
159
  "rewards/soft_format_reward_func": 0.0,
160
+ "rewards/strict_format_reward_func": 0.041666666666666664,
161
+ "rewards/xmlcount_reward_func": 0.9114583308498064,
162
  "step": 16
163
  },
164
  {
165
+ "completion_length": 294.375,
166
  "epoch": 4.571428571428571,
167
+ "grad_norm": 7.338254928588867,
168
+ "kl": 0.016075315224952647,
169
  "learning_rate": 3.013156219837776e-08,
170
  "loss": 0.0,
171
+ "reward": 2.63186077773571,
172
+ "reward_std": 0.7248760172806215,
173
+ "rewards/concensus_correctness_reward_func": 0.4661874994635582,
174
+ "rewards/consensus_reward_func": 0.5,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.125,
177
+ "rewards/question_recreation_reward_func": 0.6226419649028685,
178
  "rewards/soft_format_reward_func": 0.0,
179
+ "rewards/strict_format_reward_func": 0.046875,
180
+ "rewards/xmlcount_reward_func": 0.8711562715470791,
181
  "step": 18
182
  },
183
  {
184
+ "completion_length": 286.5416666666667,
185
  "epoch": 5.0,
186
+ "grad_norm": 5.04390811920166,
187
+ "kl": 0.017868785148796935,
188
  "learning_rate": 3.4096741493194193e-09,
189
  "loss": 0.0,
190
+ "reward": 2.4256584346294403,
191
+ "reward_std": 1.272670385427773,
192
+ "rewards/concensus_correctness_reward_func": 0.4596666644016902,
193
+ "rewards/consensus_reward_func": 0.4166666666666667,
194
  "rewards/cumulative_reward_2": 0.0,
195
+ "rewards/final_correctness_reward_func": 0.08333333333333333,
196
+ "rewards/question_recreation_reward_func": 0.5910334512591362,
197
  "rewards/soft_format_reward_func": 0.0,
198
+ "rewards/strict_format_reward_func": 0.0,
199
+ "rewards/xmlcount_reward_func": 0.8749583264191946,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 5.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": 9.198513089359039e-06,
207
+ "train_runtime": 215.0623,
208
+ "train_samples_per_second": 1.488,
209
+ "train_steps_per_second": 0.093
210
  }
211
  ],
212
  "logging_steps": 2,