NourEldin-Osama commited on
Commit
5dd256b
Β·
1 Parent(s): f971d4c

Training in progress, epoch 1

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "csebuetnlp/mT5_m2o_arabic_crossSum",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 250021,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "length_penalty": 0.6,
19
+ "max_length": 84,
20
+ "model_type": "mt5",
21
+ "num_beams": 4,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "task_specific_params": {
30
+ "langid_map": {
31
+ "amharic": [
32
+ 35,
33
+ "\u2581<extra_id_64>"
34
+ ],
35
+ "arabic": [
36
+ 4,
37
+ "\u2581<extra_id_95>"
38
+ ],
39
+ "azerbaijani": [
40
+ 7,
41
+ "\u2581<extra_id_92>"
42
+ ],
43
+ "bengali": [
44
+ 42,
45
+ "\u2581<extra_id_57>"
46
+ ],
47
+ "burmese": [
48
+ 33,
49
+ "\u2581<extra_id_66>"
50
+ ],
51
+ "chinese_simplified": [
52
+ 40,
53
+ "\u2581<extra_id_59>"
54
+ ],
55
+ "chinese_traditional": [
56
+ 44,
57
+ "\u2581<extra_id_55>"
58
+ ],
59
+ "english": [
60
+ 30,
61
+ "\u2581<extra_id_69>"
62
+ ],
63
+ "french": [
64
+ 10,
65
+ "\u2581<extra_id_89>"
66
+ ],
67
+ "gujarati": [
68
+ 27,
69
+ "\u2581<extra_id_72>"
70
+ ],
71
+ "hausa": [
72
+ 43,
73
+ "\u2581<extra_id_56>"
74
+ ],
75
+ "hindi": [
76
+ 21,
77
+ "\u2581<extra_id_78>"
78
+ ],
79
+ "igbo": [
80
+ 9,
81
+ "\u2581<extra_id_90>"
82
+ ],
83
+ "indonesian": [
84
+ 1,
85
+ "\u2581<extra_id_98>"
86
+ ],
87
+ "japanese": [
88
+ 37,
89
+ "\u2581<extra_id_62>"
90
+ ],
91
+ "kirundi": [
92
+ 0,
93
+ "\u2581<extra_id_99>"
94
+ ],
95
+ "korean": [
96
+ 29,
97
+ "\u2581<extra_id_70>"
98
+ ],
99
+ "kyrgyz": [
100
+ 5,
101
+ "\u2581<extra_id_94>"
102
+ ],
103
+ "marathi": [
104
+ 13,
105
+ "\u2581<extra_id_86>"
106
+ ],
107
+ "nepali": [
108
+ 20,
109
+ "\u2581<extra_id_79>"
110
+ ],
111
+ "oromo": [
112
+ 41,
113
+ "\u2581<extra_id_58>"
114
+ ],
115
+ "pashto": [
116
+ 34,
117
+ "\u2581<extra_id_65>"
118
+ ],
119
+ "persian": [
120
+ 23,
121
+ "\u2581<extra_id_76>"
122
+ ],
123
+ "pidgin": [
124
+ 14,
125
+ "\u2581<extra_id_85>"
126
+ ],
127
+ "portuguese": [
128
+ 39,
129
+ "\u2581<extra_id_60>"
130
+ ],
131
+ "punjabi": [
132
+ 17,
133
+ "\u2581<extra_id_82>"
134
+ ],
135
+ "russian": [
136
+ 36,
137
+ "\u2581<extra_id_63>"
138
+ ],
139
+ "scottish_gaelic": [
140
+ 24,
141
+ "\u2581<extra_id_75>"
142
+ ],
143
+ "serbian_cyrillic": [
144
+ 28,
145
+ "\u2581<extra_id_71>"
146
+ ],
147
+ "serbian_latin": [
148
+ 11,
149
+ "\u2581<extra_id_88>"
150
+ ],
151
+ "sinhala": [
152
+ 31,
153
+ "\u2581<extra_id_68>"
154
+ ],
155
+ "somali": [
156
+ 19,
157
+ "\u2581<extra_id_80>"
158
+ ],
159
+ "spanish": [
160
+ 3,
161
+ "\u2581<extra_id_96>"
162
+ ],
163
+ "swahili": [
164
+ 18,
165
+ "\u2581<extra_id_81>"
166
+ ],
167
+ "tamil": [
168
+ 32,
169
+ "\u2581<extra_id_67>"
170
+ ],
171
+ "telugu": [
172
+ 22,
173
+ "\u2581<extra_id_77>"
174
+ ],
175
+ "thai": [
176
+ 6,
177
+ "\u2581<extra_id_93>"
178
+ ],
179
+ "tigrinya": [
180
+ 16,
181
+ "\u2581<extra_id_83>"
182
+ ],
183
+ "turkish": [
184
+ 15,
185
+ "\u2581<extra_id_84>"
186
+ ],
187
+ "ukrainian": [
188
+ 2,
189
+ "\u2581<extra_id_97>"
190
+ ],
191
+ "urdu": [
192
+ 38,
193
+ "\u2581<extra_id_61>"
194
+ ],
195
+ "uzbek": [
196
+ 8,
197
+ "\u2581<extra_id_91>"
198
+ ],
199
+ "vietnamese": [
200
+ 12,
201
+ "\u2581<extra_id_87>"
202
+ ],
203
+ "welsh": [
204
+ 26,
205
+ "\u2581<extra_id_73>"
206
+ ],
207
+ "yoruba": [
208
+ 25,
209
+ "\u2581<extra_id_74>"
210
+ ]
211
+ }
212
+ },
213
+ "tie_word_embeddings": false,
214
+ "tokenizer_class": "T5Tokenizer",
215
+ "torch_dtype": "float32",
216
+ "transformers_version": "4.30.2",
217
+ "use_cache": true,
218
+ "vocab_size": 250112
219
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83874ad0cc423c0ab26ef0854bdf51fd023fcba69869589958e68eadf7f296f2
3
+ size 2329702453
special_tokens_map.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<extra_id_64>",
4
+ "▁<extra_id_95>",
5
+ "▁<extra_id_92>",
6
+ "▁<extra_id_57>",
7
+ "▁<extra_id_66>",
8
+ "▁<extra_id_59>",
9
+ "▁<extra_id_55>",
10
+ "▁<extra_id_69>",
11
+ "▁<extra_id_89>",
12
+ "▁<extra_id_72>",
13
+ "▁<extra_id_56>",
14
+ "▁<extra_id_78>",
15
+ "▁<extra_id_90>",
16
+ "▁<extra_id_98>",
17
+ "▁<extra_id_62>",
18
+ "▁<extra_id_99>",
19
+ "▁<extra_id_70>",
20
+ "▁<extra_id_94>",
21
+ "▁<extra_id_86>",
22
+ "▁<extra_id_79>",
23
+ "▁<extra_id_58>",
24
+ "▁<extra_id_65>",
25
+ "▁<extra_id_76>",
26
+ "▁<extra_id_85>",
27
+ "▁<extra_id_60>",
28
+ "▁<extra_id_82>",
29
+ "▁<extra_id_63>",
30
+ "▁<extra_id_75>",
31
+ "▁<extra_id_71>",
32
+ "▁<extra_id_88>",
33
+ "▁<extra_id_68>",
34
+ "▁<extra_id_80>",
35
+ "▁<extra_id_96>",
36
+ "▁<extra_id_81>",
37
+ "▁<extra_id_67>",
38
+ "▁<extra_id_77>",
39
+ "▁<extra_id_93>",
40
+ "▁<extra_id_83>",
41
+ "▁<extra_id_84>",
42
+ "▁<extra_id_97>",
43
+ "▁<extra_id_61>",
44
+ "▁<extra_id_91>",
45
+ "▁<extra_id_87>",
46
+ "▁<extra_id_73>",
47
+ "▁<extra_id_74>"
48
+ ],
49
+ "eos_token": "</s>",
50
+ "pad_token": "<pad>",
51
+ "unk_token": "<unk>"
52
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9b3ace1bbc6d9c245bb3de1c6b3615ade8e946290cf9b08c215ab1255de412
3
+ size 16339151
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "</s>",
5
+ "extra_ids": 0,
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "<pad>",
8
+ "sp_model_kwargs": {},
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_token": "<unk>"
11
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b2dcc3502ea8ff1d8dfa0ed8958092f6a320ce3c8c5aa93b53e47b0f576eec
3
+ size 4091