alkiskoudounas commited on
Commit
beb4f6b
·
verified ·
1 Parent(s): 02b2b41

Upload with huggingface_hub

Browse files
Files changed (8) hide show
  1. config.json +239 -0
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +3 -0
  4. rng_state.pth +3 -0
  5. scaler.pt +3 -0
  6. scheduler.pt +3 -0
  7. trainer_state.json +652 -0
  8. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-italian",
3
+ "activation_dropout": 0.05,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForSequenceClassification"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.05,
57
+ "hidden_size": 1024,
58
+ "id2label": {
59
+ "0": "cooking_query",
60
+ "1": "iot_hue_lightdim",
61
+ "10": "news_query",
62
+ "11": "music_likeness",
63
+ "12": "iot_wemo_off",
64
+ "13": "iot_hue_lighton",
65
+ "14": "iot_wemo_on",
66
+ "15": "iot_coffee",
67
+ "16": "social_post",
68
+ "17": "qa_maths",
69
+ "18": "iot_hue_lightup",
70
+ "19": "music_query",
71
+ "2": "cooking_recipe",
72
+ "20": "transport_taxi",
73
+ "21": "qa_stock",
74
+ "22": "play_radio",
75
+ "23": "play_music",
76
+ "24": "audio_volume_up",
77
+ "25": "calendar_remove",
78
+ "26": "alarm_remove",
79
+ "27": "recommendation_locations",
80
+ "28": "general_joke",
81
+ "29": "email_sendemail",
82
+ "3": "email_addcontact",
83
+ "30": "audio_volume_mute",
84
+ "31": "iot_cleaning",
85
+ "32": "social_query",
86
+ "33": "play_game",
87
+ "34": "recommendation_events",
88
+ "35": "alarm_set",
89
+ "36": "calendar_set",
90
+ "37": "audio_volume_down",
91
+ "38": "lists_createoradd",
92
+ "39": "calendar_query",
93
+ "4": "weather_query",
94
+ "40": "general_greet",
95
+ "41": "qa_factoid",
96
+ "42": "takeaway_order",
97
+ "43": "transport_query",
98
+ "44": "iot_hue_lightchange",
99
+ "45": "play_podcasts",
100
+ "46": "transport_ticket",
101
+ "47": "qa_definition",
102
+ "48": "lists_query",
103
+ "49": "email_query",
104
+ "5": "takeaway_query",
105
+ "50": "datetime_convert",
106
+ "51": "iot_hue_lightoff",
107
+ "52": "alarm_query",
108
+ "53": "audio_volume_other",
109
+ "54": "lists_remove",
110
+ "55": "datetime_query",
111
+ "56": "qa_currency",
112
+ "57": "transport_traffic",
113
+ "58": "music_dislikeness",
114
+ "59": "music_settings",
115
+ "6": "email_querycontact",
116
+ "7": "play_audiobook",
117
+ "8": "recommendation_movies",
118
+ "9": "general_quirky"
119
+ },
120
+ "initializer_range": 0.02,
121
+ "intermediate_size": 4096,
122
+ "label2id": {
123
+ "alarm_query": "52",
124
+ "alarm_remove": "26",
125
+ "alarm_set": "35",
126
+ "audio_volume_down": "37",
127
+ "audio_volume_mute": "30",
128
+ "audio_volume_other": "53",
129
+ "audio_volume_up": "24",
130
+ "calendar_query": "39",
131
+ "calendar_remove": "25",
132
+ "calendar_set": "36",
133
+ "cooking_query": "0",
134
+ "cooking_recipe": "2",
135
+ "datetime_convert": "50",
136
+ "datetime_query": "55",
137
+ "email_addcontact": "3",
138
+ "email_query": "49",
139
+ "email_querycontact": "6",
140
+ "email_sendemail": "29",
141
+ "general_greet": "40",
142
+ "general_joke": "28",
143
+ "general_quirky": "9",
144
+ "iot_cleaning": "31",
145
+ "iot_coffee": "15",
146
+ "iot_hue_lightchange": "44",
147
+ "iot_hue_lightdim": "1",
148
+ "iot_hue_lightoff": "51",
149
+ "iot_hue_lighton": "13",
150
+ "iot_hue_lightup": "18",
151
+ "iot_wemo_off": "12",
152
+ "iot_wemo_on": "14",
153
+ "lists_createoradd": "38",
154
+ "lists_query": "48",
155
+ "lists_remove": "54",
156
+ "music_dislikeness": "58",
157
+ "music_likeness": "11",
158
+ "music_query": "19",
159
+ "music_settings": "59",
160
+ "news_query": "10",
161
+ "play_audiobook": "7",
162
+ "play_game": "33",
163
+ "play_music": "23",
164
+ "play_podcasts": "45",
165
+ "play_radio": "22",
166
+ "qa_currency": "56",
167
+ "qa_definition": "47",
168
+ "qa_factoid": "41",
169
+ "qa_maths": "17",
170
+ "qa_stock": "21",
171
+ "recommendation_events": "34",
172
+ "recommendation_locations": "27",
173
+ "recommendation_movies": "8",
174
+ "social_post": "16",
175
+ "social_query": "32",
176
+ "takeaway_order": "42",
177
+ "takeaway_query": "5",
178
+ "transport_query": "43",
179
+ "transport_taxi": "20",
180
+ "transport_ticket": "46",
181
+ "transport_traffic": "57",
182
+ "weather_query": "4"
183
+ },
184
+ "layer_norm_eps": 1e-05,
185
+ "layerdrop": 0.05,
186
+ "mask_channel_length": 10,
187
+ "mask_channel_min_space": 1,
188
+ "mask_channel_other": 0.0,
189
+ "mask_channel_prob": 0.0,
190
+ "mask_channel_selection": "static",
191
+ "mask_feature_length": 10,
192
+ "mask_feature_min_masks": 0,
193
+ "mask_feature_prob": 0.0,
194
+ "mask_time_length": 10,
195
+ "mask_time_min_masks": 2,
196
+ "mask_time_min_space": 1,
197
+ "mask_time_other": 0.0,
198
+ "mask_time_prob": 0.05,
199
+ "mask_time_selection": "static",
200
+ "model_type": "wav2vec2",
201
+ "num_adapter_layers": 3,
202
+ "num_attention_heads": 16,
203
+ "num_codevector_groups": 2,
204
+ "num_codevectors_per_group": 320,
205
+ "num_conv_pos_embedding_groups": 16,
206
+ "num_conv_pos_embeddings": 128,
207
+ "num_feat_extract_layers": 7,
208
+ "num_hidden_layers": 24,
209
+ "num_negatives": 100,
210
+ "output_hidden_size": 1024,
211
+ "pad_token_id": 0,
212
+ "proj_codevector_dim": 768,
213
+ "tdnn_dilation": [
214
+ 1,
215
+ 2,
216
+ 3,
217
+ 1,
218
+ 1
219
+ ],
220
+ "tdnn_dim": [
221
+ 512,
222
+ 512,
223
+ 512,
224
+ 512,
225
+ 1500
226
+ ],
227
+ "tdnn_kernel": [
228
+ 5,
229
+ 3,
230
+ 3,
231
+ 1,
232
+ 1
233
+ ],
234
+ "torch_dtype": "float32",
235
+ "transformers_version": "4.26.0.dev0",
236
+ "use_weighted_layer_sum": false,
237
+ "vocab_size": 44,
238
+ "xvector_output_dim": 512
239
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0308b2b01be06b9079293437bdfd77e671e51d49daeae58acb4c57389bc20f
3
+ size 2525980249
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92127f1109fbcbd91a1f28142eeb0ff19a40bb41c016066abd1745f703a0dd84
3
+ size 1263010605
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0989a07dca285a3932be517629db5ee632c087a806a2c8908d8e0a4102715645
3
+ size 14567
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736ac057cd1baa6f655c41cfb89657fd1618a9438af88afef35199ebb51c3f02
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2331be09e7bc668b4ed23bb51a2b651cad51f57e9082333fbf37cc11aefe1553
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.824220746039857,
3
+ "best_model_checkpoint": "wav2vec2-large-xlsr-53-italian-ic-finetuning-hard-speaker/checkpoint-2346",
4
+ "epoch": 29.99270072992701,
5
+ "global_step": 3060,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.49,
12
+ "learning_rate": 1.6013071895424836e-05,
13
+ "loss": 4.0662,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.97,
18
+ "learning_rate": 3.169934640522876e-05,
19
+ "loss": 3.9284,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.99,
24
+ "eval_accuracy": 0.057230454777721,
25
+ "eval_loss": 3.828322649002075,
26
+ "eval_runtime": 29.2471,
27
+ "eval_samples_per_second": 66.913,
28
+ "eval_steps_per_second": 2.12,
29
+ "step": 102
30
+ },
31
+ {
32
+ "epoch": 1.47,
33
+ "learning_rate": 4.803921568627452e-05,
34
+ "loss": 3.8316,
35
+ "step": 150
36
+ },
37
+ {
38
+ "epoch": 1.95,
39
+ "learning_rate": 6.405228758169934e-05,
40
+ "loss": 3.6033,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 1.99,
45
+ "eval_accuracy": 0.18446601941747573,
46
+ "eval_loss": 3.325474262237549,
47
+ "eval_runtime": 29.2457,
48
+ "eval_samples_per_second": 66.916,
49
+ "eval_steps_per_second": 2.12,
50
+ "step": 204
51
+ },
52
+ {
53
+ "epoch": 2.45,
54
+ "learning_rate": 8.006535947712419e-05,
55
+ "loss": 3.2263,
56
+ "step": 250
57
+ },
58
+ {
59
+ "epoch": 2.93,
60
+ "learning_rate": 9.640522875816994e-05,
61
+ "loss": 2.7337,
62
+ "step": 300
63
+ },
64
+ {
65
+ "epoch": 2.99,
66
+ "eval_accuracy": 0.455799693408278,
67
+ "eval_loss": 2.375624895095825,
68
+ "eval_runtime": 29.1511,
69
+ "eval_samples_per_second": 67.133,
70
+ "eval_steps_per_second": 2.127,
71
+ "step": 306
72
+ },
73
+ {
74
+ "epoch": 3.43,
75
+ "learning_rate": 9.85838779956427e-05,
76
+ "loss": 2.2867,
77
+ "step": 350
78
+ },
79
+ {
80
+ "epoch": 3.91,
81
+ "learning_rate": 9.67683369644154e-05,
82
+ "loss": 1.8948,
83
+ "step": 400
84
+ },
85
+ {
86
+ "epoch": 3.99,
87
+ "eval_accuracy": 0.6075625958099131,
88
+ "eval_loss": 1.6275904178619385,
89
+ "eval_runtime": 29.219,
90
+ "eval_samples_per_second": 66.977,
91
+ "eval_steps_per_second": 2.122,
92
+ "step": 408
93
+ },
94
+ {
95
+ "epoch": 4.41,
96
+ "learning_rate": 9.49527959331881e-05,
97
+ "loss": 1.5128,
98
+ "step": 450
99
+ },
100
+ {
101
+ "epoch": 4.9,
102
+ "learning_rate": 9.313725490196079e-05,
103
+ "loss": 1.3163,
104
+ "step": 500
105
+ },
106
+ {
107
+ "epoch": 4.99,
108
+ "eval_accuracy": 0.7189575881451201,
109
+ "eval_loss": 1.2288696765899658,
110
+ "eval_runtime": 29.2646,
111
+ "eval_samples_per_second": 66.872,
112
+ "eval_steps_per_second": 2.119,
113
+ "step": 510
114
+ },
115
+ {
116
+ "epoch": 5.39,
117
+ "learning_rate": 9.132171387073348e-05,
118
+ "loss": 1.1049,
119
+ "step": 550
120
+ },
121
+ {
122
+ "epoch": 5.88,
123
+ "learning_rate": 8.950617283950617e-05,
124
+ "loss": 0.9525,
125
+ "step": 600
126
+ },
127
+ {
128
+ "epoch": 5.99,
129
+ "eval_accuracy": 0.7567705671946857,
130
+ "eval_loss": 1.070212960243225,
131
+ "eval_runtime": 29.2153,
132
+ "eval_samples_per_second": 66.985,
133
+ "eval_steps_per_second": 2.122,
134
+ "step": 612
135
+ },
136
+ {
137
+ "epoch": 6.37,
138
+ "learning_rate": 8.769063180827887e-05,
139
+ "loss": 0.7733,
140
+ "step": 650
141
+ },
142
+ {
143
+ "epoch": 6.86,
144
+ "learning_rate": 8.587509077705157e-05,
145
+ "loss": 0.7063,
146
+ "step": 700
147
+ },
148
+ {
149
+ "epoch": 6.99,
150
+ "eval_accuracy": 0.7797649463464487,
151
+ "eval_loss": 0.9915740489959717,
152
+ "eval_runtime": 29.2648,
153
+ "eval_samples_per_second": 66.872,
154
+ "eval_steps_per_second": 2.119,
155
+ "step": 714
156
+ },
157
+ {
158
+ "epoch": 7.35,
159
+ "learning_rate": 8.405954974582426e-05,
160
+ "loss": 0.6261,
161
+ "step": 750
162
+ },
163
+ {
164
+ "epoch": 7.84,
165
+ "learning_rate": 8.224400871459695e-05,
166
+ "loss": 0.5398,
167
+ "step": 800
168
+ },
169
+ {
170
+ "epoch": 7.99,
171
+ "eval_accuracy": 0.7797649463464487,
172
+ "eval_loss": 0.9585044980049133,
173
+ "eval_runtime": 29.2234,
174
+ "eval_samples_per_second": 66.967,
175
+ "eval_steps_per_second": 2.122,
176
+ "step": 816
177
+ },
178
+ {
179
+ "epoch": 8.33,
180
+ "learning_rate": 8.042846768336965e-05,
181
+ "loss": 0.4993,
182
+ "step": 850
183
+ },
184
+ {
185
+ "epoch": 8.82,
186
+ "learning_rate": 7.861292665214233e-05,
187
+ "loss": 0.4401,
188
+ "step": 900
189
+ },
190
+ {
191
+ "epoch": 8.99,
192
+ "eval_accuracy": 0.7894736842105263,
193
+ "eval_loss": 0.9442699551582336,
194
+ "eval_runtime": 29.2708,
195
+ "eval_samples_per_second": 66.858,
196
+ "eval_steps_per_second": 2.118,
197
+ "step": 918
198
+ },
199
+ {
200
+ "epoch": 9.31,
201
+ "learning_rate": 7.679738562091505e-05,
202
+ "loss": 0.3974,
203
+ "step": 950
204
+ },
205
+ {
206
+ "epoch": 9.8,
207
+ "learning_rate": 7.498184458968773e-05,
208
+ "loss": 0.3377,
209
+ "step": 1000
210
+ },
211
+ {
212
+ "epoch": 9.99,
213
+ "eval_accuracy": 0.7966274910577414,
214
+ "eval_loss": 0.9571655988693237,
215
+ "eval_runtime": 29.1828,
216
+ "eval_samples_per_second": 67.06,
217
+ "eval_steps_per_second": 2.125,
218
+ "step": 1020
219
+ },
220
+ {
221
+ "epoch": 10.29,
222
+ "learning_rate": 7.316630355846043e-05,
223
+ "loss": 0.2958,
224
+ "step": 1050
225
+ },
226
+ {
227
+ "epoch": 10.78,
228
+ "learning_rate": 7.135076252723311e-05,
229
+ "loss": 0.2903,
230
+ "step": 1100
231
+ },
232
+ {
233
+ "epoch": 10.99,
234
+ "eval_accuracy": 0.8114460909555442,
235
+ "eval_loss": 0.9565660357475281,
236
+ "eval_runtime": 29.2796,
237
+ "eval_samples_per_second": 66.838,
238
+ "eval_steps_per_second": 2.118,
239
+ "step": 1122
240
+ },
241
+ {
242
+ "epoch": 11.27,
243
+ "learning_rate": 6.953522149600581e-05,
244
+ "loss": 0.27,
245
+ "step": 1150
246
+ },
247
+ {
248
+ "epoch": 11.76,
249
+ "learning_rate": 6.771968046477851e-05,
250
+ "loss": 0.2313,
251
+ "step": 1200
252
+ },
253
+ {
254
+ "epoch": 11.99,
255
+ "eval_accuracy": 0.8160449667858968,
256
+ "eval_loss": 0.9552507996559143,
257
+ "eval_runtime": 29.3904,
258
+ "eval_samples_per_second": 66.586,
259
+ "eval_steps_per_second": 2.11,
260
+ "step": 1224
261
+ },
262
+ {
263
+ "epoch": 12.25,
264
+ "learning_rate": 6.590413943355121e-05,
265
+ "loss": 0.2222,
266
+ "step": 1250
267
+ },
268
+ {
269
+ "epoch": 12.74,
270
+ "learning_rate": 6.40885984023239e-05,
271
+ "loss": 0.196,
272
+ "step": 1300
273
+ },
274
+ {
275
+ "epoch": 12.99,
276
+ "eval_accuracy": 0.7976494634644865,
277
+ "eval_loss": 1.0041111707687378,
278
+ "eval_runtime": 29.2891,
279
+ "eval_samples_per_second": 66.817,
280
+ "eval_steps_per_second": 2.117,
281
+ "step": 1326
282
+ },
283
+ {
284
+ "epoch": 13.23,
285
+ "learning_rate": 6.22730573710966e-05,
286
+ "loss": 0.186,
287
+ "step": 1350
288
+ },
289
+ {
290
+ "epoch": 13.72,
291
+ "learning_rate": 6.045751633986928e-05,
292
+ "loss": 0.189,
293
+ "step": 1400
294
+ },
295
+ {
296
+ "epoch": 13.99,
297
+ "eval_accuracy": 0.8109351047521717,
298
+ "eval_loss": 1.0098705291748047,
299
+ "eval_runtime": 29.5324,
300
+ "eval_samples_per_second": 66.266,
301
+ "eval_steps_per_second": 2.099,
302
+ "step": 1428
303
+ },
304
+ {
305
+ "epoch": 14.21,
306
+ "learning_rate": 5.8641975308641984e-05,
307
+ "loss": 0.1511,
308
+ "step": 1450
309
+ },
310
+ {
311
+ "epoch": 14.7,
312
+ "learning_rate": 5.6826434277414676e-05,
313
+ "loss": 0.1253,
314
+ "step": 1500
315
+ },
316
+ {
317
+ "epoch": 14.99,
318
+ "eval_accuracy": 0.8119570771589167,
319
+ "eval_loss": 1.030671238899231,
320
+ "eval_runtime": 30.8165,
321
+ "eval_samples_per_second": 63.505,
322
+ "eval_steps_per_second": 2.012,
323
+ "step": 1530
324
+ },
325
+ {
326
+ "epoch": 15.19,
327
+ "learning_rate": 5.501089324618737e-05,
328
+ "loss": 0.1539,
329
+ "step": 1550
330
+ },
331
+ {
332
+ "epoch": 15.68,
333
+ "learning_rate": 5.319535221496006e-05,
334
+ "loss": 0.1114,
335
+ "step": 1600
336
+ },
337
+ {
338
+ "epoch": 15.99,
339
+ "eval_accuracy": 0.8048032703117016,
340
+ "eval_loss": 1.1111302375793457,
341
+ "eval_runtime": 30.7697,
342
+ "eval_samples_per_second": 63.601,
343
+ "eval_steps_per_second": 2.015,
344
+ "step": 1632
345
+ },
346
+ {
347
+ "epoch": 16.18,
348
+ "learning_rate": 5.137981118373275e-05,
349
+ "loss": 0.1331,
350
+ "step": 1650
351
+ },
352
+ {
353
+ "epoch": 16.66,
354
+ "learning_rate": 4.956427015250545e-05,
355
+ "loss": 0.1241,
356
+ "step": 1700
357
+ },
358
+ {
359
+ "epoch": 16.99,
360
+ "eval_accuracy": 0.8109351047521717,
361
+ "eval_loss": 1.0672301054000854,
362
+ "eval_runtime": 31.1401,
363
+ "eval_samples_per_second": 62.845,
364
+ "eval_steps_per_second": 1.991,
365
+ "step": 1734
366
+ },
367
+ {
368
+ "epoch": 17.16,
369
+ "learning_rate": 4.774872912127814e-05,
370
+ "loss": 0.0923,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 17.64,
375
+ "learning_rate": 4.593318809005084e-05,
376
+ "loss": 0.1128,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 17.99,
381
+ "eval_accuracy": 0.8119570771589167,
382
+ "eval_loss": 1.092710018157959,
383
+ "eval_runtime": 31.1388,
384
+ "eval_samples_per_second": 62.848,
385
+ "eval_steps_per_second": 1.991,
386
+ "step": 1836
387
+ },
388
+ {
389
+ "epoch": 18.14,
390
+ "learning_rate": 4.411764705882353e-05,
391
+ "loss": 0.0897,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 18.62,
396
+ "learning_rate": 4.230210602759623e-05,
397
+ "loss": 0.0932,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 18.99,
402
+ "eval_accuracy": 0.8185998978027593,
403
+ "eval_loss": 1.0788525342941284,
404
+ "eval_runtime": 31.2301,
405
+ "eval_samples_per_second": 62.664,
406
+ "eval_steps_per_second": 1.985,
407
+ "step": 1938
408
+ },
409
+ {
410
+ "epoch": 19.12,
411
+ "learning_rate": 4.048656499636892e-05,
412
+ "loss": 0.0849,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 19.6,
417
+ "learning_rate": 3.8671023965141615e-05,
418
+ "loss": 0.0889,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 19.99,
423
+ "eval_accuracy": 0.8104241185487991,
424
+ "eval_loss": 1.153747320175171,
425
+ "eval_runtime": 31.9386,
426
+ "eval_samples_per_second": 61.274,
427
+ "eval_steps_per_second": 1.941,
428
+ "step": 2040
429
+ },
430
+ {
431
+ "epoch": 20.1,
432
+ "learning_rate": 3.6855482933914306e-05,
433
+ "loss": 0.0732,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 20.58,
438
+ "learning_rate": 3.5039941902687e-05,
439
+ "loss": 0.0649,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 20.99,
444
+ "eval_accuracy": 0.8088911599386817,
445
+ "eval_loss": 1.1140419244766235,
446
+ "eval_runtime": 32.7576,
447
+ "eval_samples_per_second": 59.742,
448
+ "eval_steps_per_second": 1.893,
449
+ "step": 2142
450
+ },
451
+ {
452
+ "epoch": 21.08,
453
+ "learning_rate": 3.32244008714597e-05,
454
+ "loss": 0.061,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 21.56,
459
+ "learning_rate": 3.140885984023239e-05,
460
+ "loss": 0.0645,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 21.99,
465
+ "eval_accuracy": 0.8150229943791517,
466
+ "eval_loss": 1.133326530456543,
467
+ "eval_runtime": 31.5293,
468
+ "eval_samples_per_second": 62.069,
469
+ "eval_steps_per_second": 1.966,
470
+ "step": 2244
471
+ },
472
+ {
473
+ "epoch": 22.06,
474
+ "learning_rate": 2.9593318809005084e-05,
475
+ "loss": 0.0619,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 22.55,
480
+ "learning_rate": 2.777777777777778e-05,
481
+ "loss": 0.0637,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 22.99,
486
+ "eval_accuracy": 0.824220746039857,
487
+ "eval_loss": 1.1310803890228271,
488
+ "eval_runtime": 29.6669,
489
+ "eval_samples_per_second": 65.966,
490
+ "eval_steps_per_second": 2.09,
491
+ "step": 2346
492
+ },
493
+ {
494
+ "epoch": 23.04,
495
+ "learning_rate": 2.596223674655047e-05,
496
+ "loss": 0.0453,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 23.53,
501
+ "learning_rate": 2.4183006535947712e-05,
502
+ "loss": 0.0767,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 23.99,
507
+ "eval_accuracy": 0.8165559529892693,
508
+ "eval_loss": 1.1296130418777466,
509
+ "eval_runtime": 29.5997,
510
+ "eval_samples_per_second": 66.115,
511
+ "eval_steps_per_second": 2.095,
512
+ "step": 2448
513
+ },
514
+ {
515
+ "epoch": 24.02,
516
+ "learning_rate": 2.2367465504720407e-05,
517
+ "loss": 0.0446,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 24.51,
522
+ "learning_rate": 2.0551924473493103e-05,
523
+ "loss": 0.0571,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 24.99,
528
+ "learning_rate": 1.877269426289034e-05,
529
+ "loss": 0.0457,
530
+ "step": 2550
531
+ },
532
+ {
533
+ "epoch": 24.99,
534
+ "eval_accuracy": 0.8083801737353091,
535
+ "eval_loss": 1.168392539024353,
536
+ "eval_runtime": 29.5486,
537
+ "eval_samples_per_second": 66.23,
538
+ "eval_steps_per_second": 2.098,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 25.49,
543
+ "learning_rate": 1.6957153231663036e-05,
544
+ "loss": 0.0424,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 25.97,
549
+ "learning_rate": 1.5141612200435731e-05,
550
+ "loss": 0.0405,
551
+ "step": 2650
552
+ },
553
+ {
554
+ "epoch": 25.99,
555
+ "eval_accuracy": 0.8160449667858968,
556
+ "eval_loss": 1.1332188844680786,
557
+ "eval_runtime": 29.6263,
558
+ "eval_samples_per_second": 66.056,
559
+ "eval_steps_per_second": 2.093,
560
+ "step": 2652
561
+ },
562
+ {
563
+ "epoch": 26.47,
564
+ "learning_rate": 1.3326071169208426e-05,
565
+ "loss": 0.0454,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 26.95,
570
+ "learning_rate": 1.151053013798112e-05,
571
+ "loss": 0.0362,
572
+ "step": 2750
573
+ },
574
+ {
575
+ "epoch": 26.99,
576
+ "eval_accuracy": 0.8201328564128768,
577
+ "eval_loss": 1.131385087966919,
578
+ "eval_runtime": 29.8487,
579
+ "eval_samples_per_second": 65.564,
580
+ "eval_steps_per_second": 2.077,
581
+ "step": 2754
582
+ },
583
+ {
584
+ "epoch": 27.45,
585
+ "learning_rate": 9.694989106753813e-06,
586
+ "loss": 0.0337,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 27.93,
591
+ "learning_rate": 7.879448075526508e-06,
592
+ "loss": 0.0391,
593
+ "step": 2850
594
+ },
595
+ {
596
+ "epoch": 27.99,
597
+ "eval_accuracy": 0.8211548288196219,
598
+ "eval_loss": 1.1564393043518066,
599
+ "eval_runtime": 29.8613,
600
+ "eval_samples_per_second": 65.536,
601
+ "eval_steps_per_second": 2.076,
602
+ "step": 2856
603
+ },
604
+ {
605
+ "epoch": 28.43,
606
+ "learning_rate": 6.063907044299202e-06,
607
+ "loss": 0.0435,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 28.91,
612
+ "learning_rate": 4.2483660130718954e-06,
613
+ "loss": 0.0452,
614
+ "step": 2950
615
+ },
616
+ {
617
+ "epoch": 28.99,
618
+ "eval_accuracy": 0.8221768012263669,
619
+ "eval_loss": 1.1484907865524292,
620
+ "eval_runtime": 29.975,
621
+ "eval_samples_per_second": 65.288,
622
+ "eval_steps_per_second": 2.068,
623
+ "step": 2958
624
+ },
625
+ {
626
+ "epoch": 29.41,
627
+ "learning_rate": 2.43282498184459e-06,
628
+ "loss": 0.0484,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 29.9,
633
+ "learning_rate": 6.17283950617284e-07,
634
+ "loss": 0.0343,
635
+ "step": 3050
636
+ },
637
+ {
638
+ "epoch": 29.99,
639
+ "eval_accuracy": 0.8201328564128768,
640
+ "eval_loss": 1.1464686393737793,
641
+ "eval_runtime": 29.8646,
642
+ "eval_samples_per_second": 65.529,
643
+ "eval_steps_per_second": 2.076,
644
+ "step": 3060
645
+ }
646
+ ],
647
+ "max_steps": 3060,
648
+ "num_train_epochs": 30,
649
+ "total_flos": 1.1930235995944326e+20,
650
+ "trial_name": null,
651
+ "trial_params": null
652
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d88c79085c5cfec03e1ac580425ac76d3fe307b9edff2b4cd3cb2e6336d157
3
+ size 3567