murakamia commited on
Commit
37d57eb
·
verified ·
1 Parent(s): f618c08

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +4 -4
  2. model.safetensors +1 -1
  3. train_results.json +4 -4
  4. trainer_state.json +110 -110
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00039611602551303804,
4
- "train_runtime": 242.2421,
5
  "train_samples": 28,
6
- "train_samples_per_second": 1.321,
7
- "train_steps_per_second": 0.083
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 8.675511101926531,
4
+ "train_runtime": 192.5653,
5
  "train_samples": 28,
6
+ "train_samples_per_second": 1.662,
7
+ "train_steps_per_second": 0.104
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e72770482bb761fd6372292fc12a68c23a73d93e4f464073bf0ec3bad48edb7
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb35785199e6d78014b99fb5d1cacb5f3b4c9b77b521bf7559aeb5b9de82413
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00039611602551303804,
4
- "train_runtime": 242.2421,
5
  "train_samples": 28,
6
- "train_samples_per_second": 1.321,
7
- "train_steps_per_second": 0.083
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 8.675511101926531,
4
+ "train_runtime": 192.5653,
5
  "train_samples": 28,
6
+ "train_samples_per_second": 1.662,
7
+ "train_steps_per_second": 0.104
8
  }
trainer_state.json CHANGED
@@ -10,203 +10,203 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 320.90625,
14
  "epoch": 0.5714285714285714,
15
- "grad_norm": 14.3243408203125,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
- "reward": 1.8238091692328453,
20
- "reward_std": 1.0591860833810642,
21
- "rewards/concensus_correctness_reward_func": 0.25181249529123306,
22
- "rewards/consensus_reward_func": 0.375,
23
  "rewards/cumulative_reward_2": 0.0,
24
- "rewards/final_correctness_reward_func": 0.0625,
25
- "rewards/question_recreation_reward_func": 0.5330279082991183,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": 0.6014687430579215,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 394.8333333333333,
33
  "epoch": 1.0,
34
- "grad_norm": 6.504026889801025,
35
- "kl": 0.00567529887969916,
36
  "learning_rate": 4.864543104251586e-07,
37
- "loss": 0.0,
38
- "reward": 2.645399361848831,
39
- "reward_std": 1.4961872142351542,
40
- "rewards/concensus_correctness_reward_func": 0.5172500063975652,
41
- "rewards/consensus_reward_func": 0.5,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.08333333333333333,
44
- "rewards/question_recreation_reward_func": 0.740566020210584,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
- "rewards/xmlcount_reward_func": 0.8042500068744024,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 264.84375,
52
  "epoch": 1.5714285714285714,
53
- "grad_norm": 13.624690055847168,
54
- "kl": 0.09184227080550045,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0001,
57
- "reward": 2.3658637814223766,
58
- "reward_std": 0.9425615618820302,
59
- "rewards/concensus_correctness_reward_func": 0.4235000014305115,
60
- "rewards/consensus_reward_func": 0.375,
61
  "rewards/cumulative_reward_2": 0.0,
62
- "rewards/final_correctness_reward_func": 0.0625,
63
- "rewards/question_recreation_reward_func": 0.6558637763373554,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": 0.8489999696612358,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 363.25,
71
  "epoch": 2.0,
72
- "grad_norm": 11.709728240966797,
73
- "kl": 0.03270413619854177,
74
  "learning_rate": 3.867370395306068e-07,
75
- "loss": 0.0,
76
- "reward": 3.828521261612574,
77
- "reward_std": 2.0679361606016755,
78
- "rewards/concensus_correctness_reward_func": 1.4121666252613068,
79
- "rewards/consensus_reward_func": 0.75,
80
  "rewards/cumulative_reward_2": 0.0,
81
- "rewards/final_correctness_reward_func": 0.25,
82
- "rewards/question_recreation_reward_func": 0.6656045454243819,
83
  "rewards/soft_format_reward_func": 0.0,
84
- "rewards/strict_format_reward_func": 0.020833333333333332,
85
- "rewards/xmlcount_reward_func": 0.7299166768789291,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 303.125,
90
  "epoch": 2.571428571428571,
91
- "grad_norm": 12.579998970031738,
92
- "kl": 0.07898051122901961,
93
  "learning_rate": 3.1137137178519977e-07,
94
- "loss": 0.0001,
95
- "reward": 1.8908442594110966,
96
- "reward_std": 1.1686942288652062,
97
- "rewards/concensus_correctness_reward_func": 0.28337499499320984,
98
- "rewards/consensus_reward_func": 0.3125,
99
  "rewards/cumulative_reward_2": 0.0,
100
- "rewards/final_correctness_reward_func": 0.0,
101
- "rewards/question_recreation_reward_func": 0.527281790971756,
102
  "rewards/soft_format_reward_func": 0.0,
103
- "rewards/strict_format_reward_func": 0.0,
104
- "rewards/xmlcount_reward_func": 0.7676874920725822,
105
  "step": 10
106
  },
107
  {
108
- "completion_length": 304.625,
109
  "epoch": 3.0,
110
- "grad_norm": 6.953789234161377,
111
- "kl": 0.07293941577275594,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0001,
114
- "reward": 3.5938471953074136,
115
- "reward_std": 2.2706022734443345,
116
- "rewards/concensus_correctness_reward_func": 0.7098333388566971,
117
- "rewards/consensus_reward_func": 0.75,
118
  "rewards/cumulative_reward_2": 0.0,
119
- "rewards/final_correctness_reward_func": 0.4166666666666667,
120
- "rewards/question_recreation_reward_func": 0.8268054773410162,
121
  "rewards/soft_format_reward_func": 0.0,
122
  "rewards/strict_format_reward_func": 0.0,
123
- "rewards/xmlcount_reward_func": 0.890541652838389,
124
  "step": 12
125
  },
126
  {
127
- "completion_length": 256.96875,
128
  "epoch": 3.571428571428571,
129
- "grad_norm": 16.279869079589844,
130
- "kl": 0.7254527114564553,
131
  "learning_rate": 1.4957614383675767e-07,
132
- "loss": 0.0007,
133
- "reward": 3.9172775223851204,
134
- "reward_std": 1.1482481649145484,
135
- "rewards/concensus_correctness_reward_func": 0.9585625063627958,
136
- "rewards/consensus_reward_func": 0.8125,
137
  "rewards/cumulative_reward_2": 0.0,
138
- "rewards/final_correctness_reward_func": 0.375,
139
- "rewards/question_recreation_reward_func": 0.7574650766327977,
140
  "rewards/soft_format_reward_func": 0.0,
141
  "rewards/strict_format_reward_func": 0.0,
142
- "rewards/xmlcount_reward_func": 1.0137499924749136,
143
  "step": 14
144
  },
145
  {
146
- "completion_length": 308.25,
147
  "epoch": 4.0,
148
- "grad_norm": 11.026092529296875,
149
- "kl": 0.3210177328437567,
150
  "learning_rate": 8.067960709356478e-08,
151
- "loss": 0.0002,
152
- "reward": 2.663438856601715,
153
- "reward_std": 1.4866043164705236,
154
- "rewards/concensus_correctness_reward_func": 0.5567499945561091,
155
- "rewards/consensus_reward_func": 0.5833333333333334,
156
  "rewards/cumulative_reward_2": 0.0,
157
- "rewards/final_correctness_reward_func": 0.0,
158
- "rewards/question_recreation_reward_func": 0.7065221890807152,
159
  "rewards/soft_format_reward_func": 0.0,
160
  "rewards/strict_format_reward_func": 0.0,
161
- "rewards/xmlcount_reward_func": 0.8168333073457082,
162
  "step": 16
163
  },
164
  {
165
- "completion_length": 268.71875,
166
  "epoch": 4.571428571428571,
167
- "grad_norm": 18.54529571533203,
168
- "kl": 2.3130286334780976,
169
  "learning_rate": 3.013156219837776e-08,
170
- "loss": 0.0023,
171
- "reward": 3.231190398335457,
172
- "reward_std": 0.8707997768069617,
173
- "rewards/concensus_correctness_reward_func": 0.48849999718368053,
174
- "rewards/consensus_reward_func": 0.625,
175
  "rewards/cumulative_reward_2": 0.0,
176
- "rewards/final_correctness_reward_func": 0.3125,
177
- "rewards/question_recreation_reward_func": 0.7732217563316226,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
- "rewards/xmlcount_reward_func": 1.031968742609024,
181
  "step": 18
182
  },
183
  {
184
- "completion_length": 267.5,
185
  "epoch": 5.0,
186
- "grad_norm": 11.74589729309082,
187
- "kl": 0.5716485911980271,
188
  "learning_rate": 3.4096741493194193e-09,
189
- "loss": 0.0004,
190
- "reward": 2.583486075202624,
191
- "reward_std": 0.9447366874665022,
192
- "rewards/concensus_correctness_reward_func": 0.415583332379659,
193
  "rewards/consensus_reward_func": 0.4166666666666667,
194
  "rewards/cumulative_reward_2": 0.0,
195
- "rewards/final_correctness_reward_func": 0.08333333333333333,
196
- "rewards/question_recreation_reward_func": 0.654319416731596,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
- "rewards/xmlcount_reward_func": 1.013583317399025,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 5.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 0.00039611602551303804,
207
- "train_runtime": 242.2421,
208
- "train_samples_per_second": 1.321,
209
- "train_steps_per_second": 0.083
210
  }
211
  ],
212
  "logging_steps": 2,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 237.09375,
14
  "epoch": 0.5714285714285714,
15
+ "grad_norm": 23.86282730102539,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
+ "reward": 2.320491649210453,
20
+ "reward_std": 1.035187211818993,
21
+ "rewards/concensus_correctness_reward_func": 0.31562499701976776,
22
+ "rewards/consensus_reward_func": 0.4375,
23
  "rewards/cumulative_reward_2": 0.0,
24
+ "rewards/final_correctness_reward_func": 0.125,
25
+ "rewards/question_recreation_reward_func": 0.6428666170686483,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
+ "rewards/xmlcount_reward_func": 0.7994999922811985,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 202.75,
33
  "epoch": 1.0,
34
+ "grad_norm": 20.716642379760742,
35
+ "kl": 0.3580615110695362,
36
  "learning_rate": 4.864543104251586e-07,
37
+ "loss": 0.0003,
38
+ "reward": 2.4445405304431915,
39
+ "reward_std": 0.7943247662236294,
40
+ "rewards/concensus_correctness_reward_func": 0.31799999872843426,
41
+ "rewards/consensus_reward_func": 0.25,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.08333333333333333,
44
+ "rewards/question_recreation_reward_func": 0.7158738623062769,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": 1.0773333460092545,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 236.0,
52
  "epoch": 1.5714285714285714,
53
+ "grad_norm": 17.435176849365234,
54
+ "kl": 0.07964501332025975,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0001,
57
+ "reward": 3.0734478905797005,
58
+ "reward_std": 1.2019765962613747,
59
+ "rewards/concensus_correctness_reward_func": 0.818124994635582,
60
+ "rewards/consensus_reward_func": 0.625,
61
  "rewards/cumulative_reward_2": 0.0,
62
+ "rewards/final_correctness_reward_func": 0.0,
63
+ "rewards/question_recreation_reward_func": 0.6496978905051947,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
+ "rewards/xmlcount_reward_func": 0.9806249961256981,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 218.04166666666666,
71
  "epoch": 2.0,
72
+ "grad_norm": 10.406091690063477,
73
+ "kl": 81480.78907666805,
74
  "learning_rate": 3.867370395306068e-07,
75
+ "loss": 61.1106,
76
+ "reward": 3.46412992477417,
77
+ "reward_std": 2.2507151973744235,
78
+ "rewards/concensus_correctness_reward_func": 1.1319166670242946,
79
+ "rewards/consensus_reward_func": 0.4166666666666667,
80
  "rewards/cumulative_reward_2": 0.0,
81
+ "rewards/final_correctness_reward_func": 0.08333333333333333,
82
+ "rewards/question_recreation_reward_func": 0.746754934390386,
83
  "rewards/soft_format_reward_func": 0.0,
84
+ "rewards/strict_format_reward_func": 0.041666666666666664,
85
+ "rewards/xmlcount_reward_func": 1.0437916765610378,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 222.4375,
90
  "epoch": 2.571428571428571,
91
+ "grad_norm": 14.132076263427734,
92
+ "kl": 0.5107612958818208,
93
  "learning_rate": 3.1137137178519977e-07,
94
+ "loss": 0.0005,
95
+ "reward": 3.7268150821328163,
96
+ "reward_std": 1.5598002884944435,
97
+ "rewards/concensus_correctness_reward_func": 0.9949999861419201,
98
+ "rewards/consensus_reward_func": 0.8125,
99
  "rewards/cumulative_reward_2": 0.0,
100
+ "rewards/final_correctness_reward_func": 0.1875,
101
+ "rewards/question_recreation_reward_func": 0.6768150591524318,
102
  "rewards/soft_format_reward_func": 0.0,
103
+ "rewards/strict_format_reward_func": 0.015625,
104
+ "rewards/xmlcount_reward_func": 1.0393749959766865,
105
  "step": 10
106
  },
107
  {
108
+ "completion_length": 205.75,
109
  "epoch": 3.0,
110
+ "grad_norm": 8.287981033325195,
111
+ "kl": 0.11747636886623998,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0001,
114
+ "reward": 4.006574511528015,
115
+ "reward_std": 1.4391742528726656,
116
+ "rewards/concensus_correctness_reward_func": 0.9271666631102562,
117
+ "rewards/consensus_reward_func": 1.0833333333333333,
118
  "rewards/cumulative_reward_2": 0.0,
119
+ "rewards/final_correctness_reward_func": 0.0,
120
+ "rewards/question_recreation_reward_func": 0.8568661361932755,
121
  "rewards/soft_format_reward_func": 0.0,
122
  "rewards/strict_format_reward_func": 0.0,
123
+ "rewards/xmlcount_reward_func": 1.1392083217700322,
124
  "step": 12
125
  },
126
  {
127
+ "completion_length": 273.4375,
128
  "epoch": 3.571428571428571,
129
+ "grad_norm": 12.828157424926758,
130
+ "kl": 0.5077391383820213,
131
  "learning_rate": 1.4957614383675767e-07,
132
+ "loss": 0.0005,
133
+ "reward": 3.0370506569743156,
134
+ "reward_std": 1.9715431984513998,
135
+ "rewards/concensus_correctness_reward_func": 0.613062497228384,
136
+ "rewards/consensus_reward_func": 0.6875,
137
  "rewards/cumulative_reward_2": 0.0,
138
+ "rewards/final_correctness_reward_func": 0.1875,
139
+ "rewards/question_recreation_reward_func": 0.6411756332963705,
140
  "rewards/soft_format_reward_func": 0.0,
141
  "rewards/strict_format_reward_func": 0.0,
142
+ "rewards/xmlcount_reward_func": 0.9078125022351742,
143
  "step": 14
144
  },
145
  {
146
+ "completion_length": 249.08333333333334,
147
  "epoch": 4.0,
148
+ "grad_norm": 12.552739143371582,
149
+ "kl": 34149.09689274756,
150
  "learning_rate": 8.067960709356478e-08,
151
+ "loss": 25.6118,
152
+ "reward": 3.7040178924798965,
153
+ "reward_std": 1.3390946853905916,
154
+ "rewards/concensus_correctness_reward_func": 0.9702499856551489,
155
+ "rewards/consensus_reward_func": 0.8333333333333334,
156
  "rewards/cumulative_reward_2": 0.0,
157
+ "rewards/final_correctness_reward_func": 0.25,
158
+ "rewards/question_recreation_reward_func": 0.6871429421007633,
159
  "rewards/soft_format_reward_func": 0.0,
160
  "rewards/strict_format_reward_func": 0.0,
161
+ "rewards/xmlcount_reward_func": 0.9632916674017906,
162
  "step": 16
163
  },
164
  {
165
+ "completion_length": 212.78125,
166
  "epoch": 4.571428571428571,
167
+ "grad_norm": 16.37578773498535,
168
+ "kl": 19.238060696865432,
169
  "learning_rate": 3.013156219837776e-08,
170
+ "loss": 0.0192,
171
+ "reward": 3.540576569736004,
172
+ "reward_std": 1.7284664756152779,
173
+ "rewards/concensus_correctness_reward_func": 0.9062499925494194,
174
+ "rewards/consensus_reward_func": 0.8125,
175
  "rewards/cumulative_reward_2": 0.0,
176
+ "rewards/final_correctness_reward_func": 0.1875,
177
+ "rewards/question_recreation_reward_func": 0.6683264966122806,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
+ "rewards/xmlcount_reward_func": 0.96599998511374,
181
  "step": 18
182
  },
183
  {
184
+ "completion_length": 242.83333333333334,
185
  "epoch": 5.0,
186
+ "grad_norm": 244.17030334472656,
187
+ "kl": 15.989224990286553,
188
  "learning_rate": 3.4096741493194193e-09,
189
+ "loss": 0.012,
190
+ "reward": 3.2668827176094055,
191
+ "reward_std": 1.63679713383317,
192
+ "rewards/concensus_correctness_reward_func": 0.7084999978542328,
193
  "rewards/consensus_reward_func": 0.4166666666666667,
194
  "rewards/cumulative_reward_2": 0.0,
195
+ "rewards/final_correctness_reward_func": 0.25,
196
+ "rewards/question_recreation_reward_func": 0.8270076736807823,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
+ "rewards/xmlcount_reward_func": 1.0647083222866058,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 5.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": 8.675511101926531,
207
+ "train_runtime": 192.5653,
208
+ "train_samples_per_second": 1.662,
209
+ "train_steps_per_second": 0.104
210
  }
211
  ],
212
  "logging_steps": 2,