Yusra2677 commited on
Commit
d95ea3b
·
verified ·
1 Parent(s): 50d1753

End of training

Browse files
Files changed (5) hide show
  1. README.md +4 -1
  2. all_results.json +12 -0
  3. eval_results.json +7 -0
  4. train_results.json +8 -0
  5. trainer_state.json +365 -0
README.md CHANGED
@@ -4,6 +4,7 @@ license: llama2
4
  base_model: meta-llama/CodeLlama-7b-Instruct-hf
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: text-to-odrl-codellama.7b-v0
@@ -15,7 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # text-to-odrl-codellama.7b-v0
17
 
18
- This model is a fine-tuned version of [meta-llama/CodeLlama-7b-Instruct-hf](https://huggingface.co/meta-llama/CodeLlama-7b-Instruct-hf) on an unknown dataset.
 
 
19
 
20
  ## Model description
21
 
 
4
  base_model: meta-llama/CodeLlama-7b-Instruct-hf
5
  tags:
6
  - llama-factory
7
+ - lora
8
  - generated_from_trainer
9
  model-index:
10
  - name: text-to-odrl-codellama.7b-v0
 
16
 
17
  # text-to-odrl-codellama.7b-v0
18
 
19
+ This model is a fine-tuned version of [meta-llama/CodeLlama-7b-Instruct-hf](https://huggingface.co/meta-llama/CodeLlama-7b-Instruct-hf) on the text_to_odrl_train dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0923
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_loss": 0.09225524216890335,
4
+ "eval_runtime": 945.9721,
5
+ "eval_samples_per_second": 3.923,
6
+ "eval_steps_per_second": 1.962,
7
+ "total_flos": 2.4087255599087616e+17,
8
+ "train_loss": 0.11968068110531774,
9
+ "train_runtime": 7304.8453,
10
+ "train_samples_per_second": 1.016,
11
+ "train_steps_per_second": 0.064
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_loss": 0.09225524216890335,
4
+ "eval_runtime": 945.9721,
5
+ "eval_samples_per_second": 3.923,
6
+ "eval_steps_per_second": 1.962
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 2.4087255599087616e+17,
4
+ "train_loss": 0.11968068110531774,
5
+ "train_runtime": 7304.8453,
6
+ "train_samples_per_second": 1.016,
7
+ "train_steps_per_second": 0.064
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 464,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.04310344827586207,
14
+ "grad_norm": 0.22970597445964813,
15
+ "learning_rate": 3.2142857142857144e-05,
16
+ "loss": 0.5477,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.08620689655172414,
21
+ "grad_norm": 0.24922381341457367,
22
+ "learning_rate": 4.99847706754774e-05,
23
+ "loss": 0.4192,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.12931034482758622,
28
+ "grad_norm": 0.1861911565065384,
29
+ "learning_rate": 4.9863047384206835e-05,
30
+ "loss": 0.2814,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.1724137931034483,
35
+ "grad_norm": 0.16707484424114227,
36
+ "learning_rate": 4.962019382530521e-05,
37
+ "loss": 0.2082,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.21551724137931033,
42
+ "grad_norm": 0.17490606009960175,
43
+ "learning_rate": 4.925739315689991e-05,
44
+ "loss": 0.1599,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.25862068965517243,
49
+ "grad_norm": 0.16346780955791473,
50
+ "learning_rate": 4.877641290737884e-05,
51
+ "loss": 0.144,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.3017241379310345,
56
+ "grad_norm": 0.16197219491004944,
57
+ "learning_rate": 4.817959636416969e-05,
58
+ "loss": 0.123,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.3448275862068966,
63
+ "grad_norm": 0.1676415354013443,
64
+ "learning_rate": 4.7469851157479177e-05,
65
+ "loss": 0.1168,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.3879310344827586,
70
+ "grad_norm": 0.14542680978775024,
71
+ "learning_rate": 4.665063509461097e-05,
72
+ "loss": 0.1112,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.43103448275862066,
77
+ "grad_norm": 0.1882169097661972,
78
+ "learning_rate": 4.572593931387604e-05,
79
+ "loss": 0.1005,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.47413793103448276,
84
+ "grad_norm": 0.14488768577575684,
85
+ "learning_rate": 4.4700268840168045e-05,
86
+ "loss": 0.102,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.5172413793103449,
91
+ "grad_norm": 0.15230809152126312,
92
+ "learning_rate": 4.357862063693486e-05,
93
+ "loss": 0.103,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.5603448275862069,
98
+ "grad_norm": 0.13396801054477692,
99
+ "learning_rate": 4.2366459261474933e-05,
100
+ "loss": 0.1035,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.603448275862069,
105
+ "grad_norm": 0.1350702941417694,
106
+ "learning_rate": 4.1069690242163484e-05,
107
+ "loss": 0.0995,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.646551724137931,
112
+ "grad_norm": 0.1560034453868866,
113
+ "learning_rate": 3.969463130731183e-05,
114
+ "loss": 0.0979,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.6896551724137931,
119
+ "grad_norm": 0.13901282846927643,
120
+ "learning_rate": 3.824798160583012e-05,
121
+ "loss": 0.0947,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.7327586206896551,
126
+ "grad_norm": 0.18362534046173096,
127
+ "learning_rate": 3.673678906964727e-05,
128
+ "loss": 0.0969,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.7758620689655172,
133
+ "grad_norm": 0.17652247846126556,
134
+ "learning_rate": 3.516841607689501e-05,
135
+ "loss": 0.0905,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.8189655172413793,
140
+ "grad_norm": 0.1358911544084549,
141
+ "learning_rate": 3.355050358314172e-05,
142
+ "loss": 0.0925,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.8620689655172413,
147
+ "grad_norm": 0.1052212044596672,
148
+ "learning_rate": 3.1890933895424976e-05,
149
+ "loss": 0.0953,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.9051724137931034,
154
+ "grad_norm": 0.13334128260612488,
155
+ "learning_rate": 3.0197792270443982e-05,
156
+ "loss": 0.0886,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.9482758620689655,
161
+ "grad_norm": 0.12180697172880173,
162
+ "learning_rate": 2.8479327524001636e-05,
163
+ "loss": 0.0914,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.9913793103448276,
168
+ "grad_norm": 0.1343410611152649,
169
+ "learning_rate": 2.674391184360313e-05,
170
+ "loss": 0.0887,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 1.0344827586206897,
175
+ "grad_norm": 0.13129718601703644,
176
+ "learning_rate": 2.5e-05,
177
+ "loss": 0.0993,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 1.0775862068965518,
182
+ "grad_norm": 0.12985403835773468,
183
+ "learning_rate": 2.3256088156396868e-05,
184
+ "loss": 0.0925,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 1.1206896551724137,
189
+ "grad_norm": 0.10140767693519592,
190
+ "learning_rate": 2.1520672475998373e-05,
191
+ "loss": 0.086,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 1.1637931034482758,
196
+ "grad_norm": 0.10552043467760086,
197
+ "learning_rate": 1.980220772955602e-05,
198
+ "loss": 0.0885,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 1.206896551724138,
203
+ "grad_norm": 0.17615962028503418,
204
+ "learning_rate": 1.8109066104575023e-05,
205
+ "loss": 0.0935,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 1.25,
210
+ "grad_norm": 0.16146473586559296,
211
+ "learning_rate": 1.6449496416858284e-05,
212
+ "loss": 0.089,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 1.293103448275862,
217
+ "grad_norm": 0.11695656925439835,
218
+ "learning_rate": 1.4831583923104999e-05,
219
+ "loss": 0.0934,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 1.3362068965517242,
224
+ "grad_norm": 0.13488854467868805,
225
+ "learning_rate": 1.3263210930352737e-05,
226
+ "loss": 0.0902,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 1.3793103448275863,
231
+ "grad_norm": 0.14104993641376495,
232
+ "learning_rate": 1.175201839416988e-05,
233
+ "loss": 0.0915,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 1.4224137931034484,
238
+ "grad_norm": 0.11073966324329376,
239
+ "learning_rate": 1.0305368692688174e-05,
240
+ "loss": 0.0904,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 1.4655172413793103,
245
+ "grad_norm": 0.12246192991733551,
246
+ "learning_rate": 8.930309757836517e-06,
247
+ "loss": 0.086,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 1.5086206896551724,
252
+ "grad_norm": 0.11951974779367447,
253
+ "learning_rate": 7.633540738525066e-06,
254
+ "loss": 0.0883,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 1.5517241379310345,
259
+ "grad_norm": 0.11673244833946228,
260
+ "learning_rate": 6.421379363065142e-06,
261
+ "loss": 0.0868,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 1.5948275862068966,
266
+ "grad_norm": 0.11525051295757294,
267
+ "learning_rate": 5.299731159831953e-06,
268
+ "loss": 0.0889,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 1.6379310344827587,
273
+ "grad_norm": 0.16884970664978027,
274
+ "learning_rate": 4.274060686123959e-06,
275
+ "loss": 0.0905,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 1.6810344827586206,
280
+ "grad_norm": 0.12776394188404083,
281
+ "learning_rate": 3.3493649053890326e-06,
282
+ "loss": 0.0905,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 1.7241379310344827,
287
+ "grad_norm": 0.13251963257789612,
288
+ "learning_rate": 2.5301488425208296e-06,
289
+ "loss": 0.0862,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 1.7672413793103448,
294
+ "grad_norm": 0.14248506724834442,
295
+ "learning_rate": 1.8204036358303173e-06,
296
+ "loss": 0.09,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 1.8103448275862069,
301
+ "grad_norm": 0.10634557157754898,
302
+ "learning_rate": 1.2235870926211619e-06,
303
+ "loss": 0.0839,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 1.853448275862069,
308
+ "grad_norm": 0.12421922385692596,
309
+ "learning_rate": 7.426068431000882e-07,
310
+ "loss": 0.088,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 1.896551724137931,
315
+ "grad_norm": 0.12003956735134125,
316
+ "learning_rate": 3.7980617469479953e-07,
317
+ "loss": 0.0932,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 1.9396551724137931,
322
+ "grad_norm": 0.10775435715913773,
323
+ "learning_rate": 1.3695261579316777e-07,
324
+ "loss": 0.0842,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 1.9827586206896552,
329
+ "grad_norm": 0.13052473962306976,
330
+ "learning_rate": 1.522932452260595e-08,
331
+ "loss": 0.0894,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 2.0,
336
+ "step": 464,
337
+ "total_flos": 2.4087255599087616e+17,
338
+ "train_loss": 0.11968068110531774,
339
+ "train_runtime": 7304.8453,
340
+ "train_samples_per_second": 1.016,
341
+ "train_steps_per_second": 0.064
342
+ }
343
+ ],
344
+ "logging_steps": 10,
345
+ "max_steps": 464,
346
+ "num_input_tokens_seen": 0,
347
+ "num_train_epochs": 2,
348
+ "save_steps": 100,
349
+ "stateful_callbacks": {
350
+ "TrainerControl": {
351
+ "args": {
352
+ "should_epoch_stop": false,
353
+ "should_evaluate": false,
354
+ "should_log": false,
355
+ "should_save": true,
356
+ "should_training_stop": true
357
+ },
358
+ "attributes": {}
359
+ }
360
+ },
361
+ "total_flos": 2.4087255599087616e+17,
362
+ "train_batch_size": 4,
363
+ "trial_name": null,
364
+ "trial_params": null
365
+ }