Iedha commited on
Commit
1a9617c
·
verified ·
1 Parent(s): 41c6394

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +5 -5
  2. model.safetensors +1 -1
  3. train_results.json +5 -5
  4. trainer_state.json +107 -107
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 1040.7425,
5
- "train_samples": 2,
6
- "train_samples_per_second": 0.038,
7
- "train_steps_per_second": 0.01
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.01780639439821243,
4
+ "train_runtime": 1859.9628,
5
+ "train_samples": 41,
6
+ "train_samples_per_second": 0.022,
7
+ "train_steps_per_second": 0.005
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50ff9b6b649894c959c15a8db61291438695eb00e2bce04995391923a10370f5
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286fdb3e49aa32388e88e4345400cdc49ec2dfb94e3383b066d15d273eb6e95d
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 1040.7425,
5
- "train_samples": 2,
6
- "train_samples_per_second": 0.038,
7
- "train_steps_per_second": 0.01
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.01780639439821243,
4
+ "train_runtime": 1859.9628,
5
+ "train_samples": 41,
6
+ "train_samples_per_second": 0.022,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 10.0,
6
  "eval_steps": 500,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
@@ -16,37 +16,37 @@
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
- "completions/max_length": 14.0,
20
- "completions/max_terminated_length": 14.0,
21
- "completions/mean_length": 14.0,
22
- "completions/mean_terminated_length": 14.0,
23
- "completions/min_length": 14.0,
24
- "completions/min_terminated_length": 14.0,
25
- "epoch": 2.0,
26
- "frac_reward_zero_std": 1.0,
27
- "grad_norm": 0.0,
28
  "kl": 0.0,
29
  "learning_rate": 5e-07,
30
- "loss": 0.0,
31
- "num_tokens": 1136.0,
32
- "reward": 1.2776422500610352,
33
- "reward_std": 0.0,
34
  "rewards/concensus_correctness_reward_func/mean": 0.0,
35
  "rewards/concensus_correctness_reward_func/std": 0.0,
36
  "rewards/consensus_reward_func/mean": 0.0,
37
  "rewards/consensus_reward_func/std": 0.0,
38
  "rewards/cumulative_reward_2/mean": 0.0,
39
  "rewards/cumulative_reward_2/std": 0.0,
40
- "rewards/final_correctness_reward_func/mean": 1.0,
41
- "rewards/final_correctness_reward_func/std": 1.154700517654419,
42
- "rewards/question_recreation_reward_func/mean": 0.027642276138067245,
43
- "rewards/question_recreation_reward_func/std": 0.0,
44
  "rewards/soft_format_reward_func/mean": 0.0,
45
  "rewards/soft_format_reward_func/std": 0.0,
46
  "rewards/strict_format_reward_func/mean": 0.0,
47
  "rewards/strict_format_reward_func/std": 0.0,
48
- "rewards/xmlcount_reward_func/mean": 0.25,
49
- "rewards/xmlcount_reward_func/std": 0.0,
50
  "step": 2
51
  },
52
  {
@@ -56,37 +56,37 @@
56
  "clip_ratio/low_min": 0.0,
57
  "clip_ratio/region_mean": 0.0,
58
  "completions/clipped_ratio": 0.0,
59
- "completions/max_length": 14.0,
60
- "completions/max_terminated_length": 14.0,
61
- "completions/mean_length": 14.0,
62
- "completions/mean_terminated_length": 14.0,
63
- "completions/min_length": 14.0,
64
- "completions/min_terminated_length": 14.0,
65
- "epoch": 4.0,
66
- "frac_reward_zero_std": 1.0,
67
- "grad_norm": 0.0,
68
- "kl": 0.0,
69
  "learning_rate": 4.415111107797445e-07,
70
- "loss": 0.0,
71
- "num_tokens": 2272.0,
72
- "reward": 1.2890243530273438,
73
- "reward_std": 0.0,
74
  "rewards/concensus_correctness_reward_func/mean": 0.0,
75
  "rewards/concensus_correctness_reward_func/std": 0.0,
76
  "rewards/consensus_reward_func/mean": 0.0,
77
  "rewards/consensus_reward_func/std": 0.0,
78
  "rewards/cumulative_reward_2/mean": 0.0,
79
  "rewards/cumulative_reward_2/std": 0.0,
80
- "rewards/final_correctness_reward_func/mean": 1.0,
81
- "rewards/final_correctness_reward_func/std": 1.154700517654419,
82
- "rewards/question_recreation_reward_func/mean": 0.039024390280246735,
83
- "rewards/question_recreation_reward_func/std": 0.0,
84
  "rewards/soft_format_reward_func/mean": 0.0,
85
  "rewards/soft_format_reward_func/std": 0.0,
86
  "rewards/strict_format_reward_func/mean": 0.0,
87
  "rewards/strict_format_reward_func/std": 0.0,
88
- "rewards/xmlcount_reward_func/mean": 0.25,
89
- "rewards/xmlcount_reward_func/std": 0.0,
90
  "step": 4
91
  },
92
  {
@@ -96,37 +96,37 @@
96
  "clip_ratio/low_min": 0.0,
97
  "clip_ratio/region_mean": 0.0,
98
  "completions/clipped_ratio": 0.0,
99
- "completions/max_length": 14.0,
100
- "completions/max_terminated_length": 14.0,
101
- "completions/mean_length": 14.0,
102
- "completions/mean_terminated_length": 14.0,
103
- "completions/min_length": 14.0,
104
- "completions/min_terminated_length": 14.0,
105
- "epoch": 6.0,
106
- "frac_reward_zero_std": 1.0,
107
- "grad_norm": 0.0,
108
- "kl": 0.0,
109
  "learning_rate": 2.934120444167326e-07,
110
- "loss": 0.0,
111
- "num_tokens": 3408.0,
112
- "reward": 1.2776422500610352,
113
- "reward_std": 0.0,
114
  "rewards/concensus_correctness_reward_func/mean": 0.0,
115
  "rewards/concensus_correctness_reward_func/std": 0.0,
116
  "rewards/consensus_reward_func/mean": 0.0,
117
  "rewards/consensus_reward_func/std": 0.0,
118
  "rewards/cumulative_reward_2/mean": 0.0,
119
  "rewards/cumulative_reward_2/std": 0.0,
120
- "rewards/final_correctness_reward_func/mean": 1.0,
121
- "rewards/final_correctness_reward_func/std": 1.154700517654419,
122
- "rewards/question_recreation_reward_func/mean": 0.027642276138067245,
123
- "rewards/question_recreation_reward_func/std": 0.0,
124
  "rewards/soft_format_reward_func/mean": 0.0,
125
  "rewards/soft_format_reward_func/std": 0.0,
126
  "rewards/strict_format_reward_func/mean": 0.0,
127
  "rewards/strict_format_reward_func/std": 0.0,
128
- "rewards/xmlcount_reward_func/mean": 0.25,
129
- "rewards/xmlcount_reward_func/std": 0.0,
130
  "step": 6
131
  },
132
  {
@@ -136,37 +136,37 @@
136
  "clip_ratio/low_min": 0.0,
137
  "clip_ratio/region_mean": 0.0,
138
  "completions/clipped_ratio": 0.0,
139
- "completions/max_length": 14.0,
140
- "completions/max_terminated_length": 14.0,
141
- "completions/mean_length": 14.0,
142
- "completions/mean_terminated_length": 14.0,
143
- "completions/min_length": 14.0,
144
- "completions/min_terminated_length": 14.0,
145
- "epoch": 8.0,
146
- "frac_reward_zero_std": 1.0,
147
- "grad_norm": 0.0,
148
- "kl": 0.0,
149
  "learning_rate": 1.2500000000000005e-07,
150
- "loss": 0.0,
151
- "num_tokens": 4544.0,
152
- "reward": 1.2890243530273438,
153
- "reward_std": 0.0,
154
  "rewards/concensus_correctness_reward_func/mean": 0.0,
155
  "rewards/concensus_correctness_reward_func/std": 0.0,
156
  "rewards/consensus_reward_func/mean": 0.0,
157
  "rewards/consensus_reward_func/std": 0.0,
158
  "rewards/cumulative_reward_2/mean": 0.0,
159
  "rewards/cumulative_reward_2/std": 0.0,
160
- "rewards/final_correctness_reward_func/mean": 1.0,
161
- "rewards/final_correctness_reward_func/std": 1.154700517654419,
162
- "rewards/question_recreation_reward_func/mean": 0.039024390280246735,
163
- "rewards/question_recreation_reward_func/std": 0.0,
164
  "rewards/soft_format_reward_func/mean": 0.0,
165
  "rewards/soft_format_reward_func/std": 0.0,
166
  "rewards/strict_format_reward_func/mean": 0.0,
167
  "rewards/strict_format_reward_func/std": 0.0,
168
- "rewards/xmlcount_reward_func/mean": 0.25,
169
- "rewards/xmlcount_reward_func/std": 0.0,
170
  "step": 8
171
  },
172
  {
@@ -176,53 +176,53 @@
176
  "clip_ratio/low_min": 0.0,
177
  "clip_ratio/region_mean": 0.0,
178
  "completions/clipped_ratio": 0.0,
179
- "completions/max_length": 14.0,
180
- "completions/max_terminated_length": 14.0,
181
- "completions/mean_length": 14.0,
182
- "completions/mean_terminated_length": 14.0,
183
- "completions/min_length": 14.0,
184
- "completions/min_terminated_length": 14.0,
185
- "epoch": 10.0,
186
- "frac_reward_zero_std": 1.0,
187
- "grad_norm": 0.0,
188
- "kl": 0.0,
189
  "learning_rate": 1.507684480352292e-08,
190
- "loss": 0.0,
191
- "num_tokens": 5680.0,
192
- "reward": 1.2776422500610352,
193
- "reward_std": 0.0,
194
  "rewards/concensus_correctness_reward_func/mean": 0.0,
195
  "rewards/concensus_correctness_reward_func/std": 0.0,
196
  "rewards/consensus_reward_func/mean": 0.0,
197
  "rewards/consensus_reward_func/std": 0.0,
198
  "rewards/cumulative_reward_2/mean": 0.0,
199
  "rewards/cumulative_reward_2/std": 0.0,
200
- "rewards/final_correctness_reward_func/mean": 1.0,
201
- "rewards/final_correctness_reward_func/std": 1.154700517654419,
202
- "rewards/question_recreation_reward_func/mean": 0.027642276138067245,
203
- "rewards/question_recreation_reward_func/std": 0.0,
204
  "rewards/soft_format_reward_func/mean": 0.0,
205
  "rewards/soft_format_reward_func/std": 0.0,
206
  "rewards/strict_format_reward_func/mean": 0.0,
207
  "rewards/strict_format_reward_func/std": 0.0,
208
- "rewards/xmlcount_reward_func/mean": 0.25,
209
- "rewards/xmlcount_reward_func/std": 0.0,
210
  "step": 10
211
  },
212
  {
213
- "epoch": 10.0,
214
  "step": 10,
215
  "total_flos": 0.0,
216
- "train_loss": 0.0,
217
- "train_runtime": 1040.7425,
218
- "train_samples_per_second": 0.038,
219
- "train_steps_per_second": 0.01
220
  }
221
  ],
222
  "logging_steps": 2,
223
  "max_steps": 10,
224
- "num_input_tokens_seen": 5680,
225
- "num_train_epochs": 10,
226
  "save_steps": 10,
227
  "stateful_callbacks": {
228
  "TrainerControl": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4878048780487805,
6
  "eval_steps": 500,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
 
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
+ "completions/max_length": 127.0,
20
+ "completions/max_terminated_length": 127.0,
21
+ "completions/mean_length": 59.25,
22
+ "completions/mean_terminated_length": 59.25,
23
+ "completions/min_length": 13.0,
24
+ "completions/min_terminated_length": 13.0,
25
+ "epoch": 0.0975609756097561,
26
+ "frac_reward_zero_std": 0.0,
27
+ "grad_norm": 25.766353607177734,
28
  "kl": 0.0,
29
  "learning_rate": 5e-07,
30
+ "loss": 0.0186,
31
+ "num_tokens": 1498.0,
32
+ "reward": 0.049393012188374996,
33
+ "reward_std": 0.048807840794324875,
34
  "rewards/concensus_correctness_reward_func/mean": 0.0,
35
  "rewards/concensus_correctness_reward_func/std": 0.0,
36
  "rewards/consensus_reward_func/mean": 0.0,
37
  "rewards/consensus_reward_func/std": 0.0,
38
  "rewards/cumulative_reward_2/mean": 0.0,
39
  "rewards/cumulative_reward_2/std": 0.0,
40
+ "rewards/final_correctness_reward_func/mean": 0.0,
41
+ "rewards/final_correctness_reward_func/std": 0.0,
42
+ "rewards/question_recreation_reward_func/mean": 0.02114301174879074,
43
+ "rewards/question_recreation_reward_func/std": 0.0122159318998456,
44
  "rewards/soft_format_reward_func/mean": 0.0,
45
  "rewards/soft_format_reward_func/std": 0.0,
46
  "rewards/strict_format_reward_func/mean": 0.0,
47
  "rewards/strict_format_reward_func/std": 0.0,
48
+ "rewards/xmlcount_reward_func/mean": 0.02824999950826168,
49
+ "rewards/xmlcount_reward_func/std": 0.05649999901652336,
50
  "step": 2
51
  },
52
  {
 
56
  "clip_ratio/low_min": 0.0,
57
  "clip_ratio/region_mean": 0.0,
58
  "completions/clipped_ratio": 0.0,
59
+ "completions/max_length": 132.0,
60
+ "completions/max_terminated_length": 132.0,
61
+ "completions/mean_length": 53.75,
62
+ "completions/mean_terminated_length": 53.75,
63
+ "completions/min_length": 24.5,
64
+ "completions/min_terminated_length": 24.5,
65
+ "epoch": 0.1951219512195122,
66
+ "frac_reward_zero_std": 0.25,
67
+ "grad_norm": 19.915145874023438,
68
+ "kl": 0.0023184213787317276,
69
  "learning_rate": 4.415111107797445e-07,
70
+ "loss": 0.0505,
71
+ "num_tokens": 2952.0,
72
+ "reward": 0.04935022257268429,
73
+ "reward_std": 0.05023687332868576,
74
  "rewards/concensus_correctness_reward_func/mean": 0.0,
75
  "rewards/concensus_correctness_reward_func/std": 0.0,
76
  "rewards/consensus_reward_func/mean": 0.0,
77
  "rewards/consensus_reward_func/std": 0.0,
78
  "rewards/cumulative_reward_2/mean": 0.0,
79
  "rewards/cumulative_reward_2/std": 0.0,
80
+ "rewards/final_correctness_reward_func/mean": 0.0,
81
+ "rewards/final_correctness_reward_func/std": 0.0,
82
+ "rewards/question_recreation_reward_func/mean": 0.018100222572684288,
83
+ "rewards/question_recreation_reward_func/std": 0.012556762900203466,
84
  "rewards/soft_format_reward_func/mean": 0.0,
85
  "rewards/soft_format_reward_func/std": 0.0,
86
  "rewards/strict_format_reward_func/mean": 0.0,
87
  "rewards/strict_format_reward_func/std": 0.0,
88
+ "rewards/xmlcount_reward_func/mean": 0.03125,
89
+ "rewards/xmlcount_reward_func/std": 0.0625,
90
  "step": 4
91
  },
92
  {
 
96
  "clip_ratio/low_min": 0.0,
97
  "clip_ratio/region_mean": 0.0,
98
  "completions/clipped_ratio": 0.0,
99
+ "completions/max_length": 67.5,
100
+ "completions/max_terminated_length": 67.5,
101
+ "completions/mean_length": 28.25,
102
+ "completions/mean_terminated_length": 28.25,
103
+ "completions/min_length": 5.5,
104
+ "completions/min_terminated_length": 5.5,
105
+ "epoch": 0.2926829268292683,
106
+ "frac_reward_zero_std": 0.0,
107
+ "grad_norm": 97.34249877929688,
108
+ "kl": 0.01801438198890537,
109
  "learning_rate": 2.934120444167326e-07,
110
+ "loss": -0.16,
111
+ "num_tokens": 4202.0,
112
+ "reward": 0.114079300314188,
113
+ "reward_std": 0.08150303550064564,
114
  "rewards/concensus_correctness_reward_func/mean": 0.0,
115
  "rewards/concensus_correctness_reward_func/std": 0.0,
116
  "rewards/consensus_reward_func/mean": 0.0,
117
  "rewards/consensus_reward_func/std": 0.0,
118
  "rewards/cumulative_reward_2/mean": 0.0,
119
  "rewards/cumulative_reward_2/std": 0.0,
120
+ "rewards/final_correctness_reward_func/mean": 0.0,
121
+ "rewards/final_correctness_reward_func/std": 0.0,
122
+ "rewards/question_recreation_reward_func/mean": 0.08282929984852672,
123
+ "rewards/question_recreation_reward_func/std": 0.03306010598316789,
124
  "rewards/soft_format_reward_func/mean": 0.0,
125
  "rewards/soft_format_reward_func/std": 0.0,
126
  "rewards/strict_format_reward_func/mean": 0.0,
127
  "rewards/strict_format_reward_func/std": 0.0,
128
+ "rewards/xmlcount_reward_func/mean": 0.03125,
129
+ "rewards/xmlcount_reward_func/std": 0.0625,
130
  "step": 6
131
  },
132
  {
 
136
  "clip_ratio/low_min": 0.0,
137
  "clip_ratio/region_mean": 0.0,
138
  "completions/clipped_ratio": 0.0,
139
+ "completions/max_length": 75.5,
140
+ "completions/max_terminated_length": 75.5,
141
+ "completions/mean_length": 36.875,
142
+ "completions/mean_terminated_length": 36.875,
143
+ "completions/min_length": 16.0,
144
+ "completions/min_terminated_length": 16.0,
145
+ "epoch": 0.3902439024390244,
146
+ "frac_reward_zero_std": 0.25,
147
+ "grad_norm": 50.915489196777344,
148
+ "kl": 0.019150954321958125,
149
  "learning_rate": 1.2500000000000005e-07,
150
+ "loss": -0.0363,
151
+ "num_tokens": 5521.0,
152
+ "reward": 0.13781297951936722,
153
+ "reward_std": 0.03136043483391404,
154
  "rewards/concensus_correctness_reward_func/mean": 0.0,
155
  "rewards/concensus_correctness_reward_func/std": 0.0,
156
  "rewards/consensus_reward_func/mean": 0.0,
157
  "rewards/consensus_reward_func/std": 0.0,
158
  "rewards/cumulative_reward_2/mean": 0.0,
159
  "rewards/cumulative_reward_2/std": 0.0,
160
+ "rewards/final_correctness_reward_func/mean": 0.0,
161
+ "rewards/final_correctness_reward_func/std": 0.0,
162
+ "rewards/question_recreation_reward_func/mean": 0.028437979985028505,
163
+ "rewards/question_recreation_reward_func/std": 0.010751228081062436,
164
  "rewards/soft_format_reward_func/mean": 0.0,
165
  "rewards/soft_format_reward_func/std": 0.0,
166
  "rewards/strict_format_reward_func/mean": 0.0,
167
  "rewards/strict_format_reward_func/std": 0.0,
168
+ "rewards/xmlcount_reward_func/mean": 0.109375,
169
+ "rewards/xmlcount_reward_func/std": 0.13200797885656357,
170
  "step": 8
171
  },
172
  {
 
176
  "clip_ratio/low_min": 0.0,
177
  "clip_ratio/region_mean": 0.0,
178
  "completions/clipped_ratio": 0.0,
179
+ "completions/max_length": 120.5,
180
+ "completions/max_terminated_length": 120.5,
181
+ "completions/mean_length": 44.25,
182
+ "completions/mean_terminated_length": 44.25,
183
+ "completions/min_length": 12.5,
184
+ "completions/min_terminated_length": 12.5,
185
+ "epoch": 0.4878048780487805,
186
+ "frac_reward_zero_std": 0.25,
187
+ "grad_norm": 31.54133415222168,
188
+ "kl": 0.02116560866124928,
189
  "learning_rate": 1.507684480352292e-08,
190
+ "loss": 0.0382,
191
+ "num_tokens": 6899.0,
192
+ "reward": 0.08553153276443481,
193
+ "reward_std": 0.019916290184482932,
194
  "rewards/concensus_correctness_reward_func/mean": 0.0,
195
  "rewards/concensus_correctness_reward_func/std": 0.0,
196
  "rewards/consensus_reward_func/mean": 0.0,
197
  "rewards/consensus_reward_func/std": 0.0,
198
  "rewards/cumulative_reward_2/mean": 0.0,
199
  "rewards/cumulative_reward_2/std": 0.0,
200
+ "rewards/final_correctness_reward_func/mean": 0.0,
201
+ "rewards/final_correctness_reward_func/std": 0.0,
202
+ "rewards/question_recreation_reward_func/mean": 0.013406533282250166,
203
+ "rewards/question_recreation_reward_func/std": 0.013068773550912738,
204
  "rewards/soft_format_reward_func/mean": 0.0,
205
  "rewards/soft_format_reward_func/std": 0.0,
206
  "rewards/strict_format_reward_func/mean": 0.0,
207
  "rewards/strict_format_reward_func/std": 0.0,
208
+ "rewards/xmlcount_reward_func/mean": 0.07212499901652336,
209
+ "rewards/xmlcount_reward_func/std": 0.09649057686328888,
210
  "step": 10
211
  },
212
  {
213
+ "epoch": 0.4878048780487805,
214
  "step": 10,
215
  "total_flos": 0.0,
216
+ "train_loss": -0.01780639439821243,
217
+ "train_runtime": 1859.9628,
218
+ "train_samples_per_second": 0.022,
219
+ "train_steps_per_second": 0.005
220
  }
221
  ],
222
  "logging_steps": 2,
223
  "max_steps": 10,
224
+ "num_input_tokens_seen": 6899,
225
+ "num_train_epochs": 1,
226
  "save_steps": 10,
227
  "stateful_callbacks": {
228
  "TrainerControl": {