Training in progress, epoch 1

Browse files

Files changed (9) hide show

.gitattributes +1 -0
.gitignore +1 -0
config.json +219 -0
pytorch_model.bin +3 -0
special_tokens_map.json +52 -0
spiece.model +3 -0
tokenizer.json +3 -0
tokenizer_config.json +11 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

config.json ADDED Viewed

	@@ -0,0 +1,219 @@

+{
+  "_name_or_path": "csebuetnlp/mT5_m2o_arabic_crossSum",
+  "architectures": [
+    "MT5ForConditionalGeneration"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 250021,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "length_penalty": 0.6,
+  "max_length": 84,
+  "model_type": "mt5",
+  "num_beams": 4,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "langid_map": {
+      "amharic": [
+        35,
+        "\u2581<extra_id_64>"
+      ],
+      "arabic": [
+        4,
+        "\u2581<extra_id_95>"
+      ],
+      "azerbaijani": [
+        7,
+        "\u2581<extra_id_92>"
+      ],
+      "bengali": [
+        42,
+        "\u2581<extra_id_57>"
+      ],
+      "burmese": [
+        33,
+        "\u2581<extra_id_66>"
+      ],
+      "chinese_simplified": [
+        40,
+        "\u2581<extra_id_59>"
+      ],
+      "chinese_traditional": [
+        44,
+        "\u2581<extra_id_55>"
+      ],
+      "english": [
+        30,
+        "\u2581<extra_id_69>"
+      ],
+      "french": [
+        10,
+        "\u2581<extra_id_89>"
+      ],
+      "gujarati": [
+        27,
+        "\u2581<extra_id_72>"
+      ],
+      "hausa": [
+        43,
+        "\u2581<extra_id_56>"
+      ],
+      "hindi": [
+        21,
+        "\u2581<extra_id_78>"
+      ],
+      "igbo": [
+        9,
+        "\u2581<extra_id_90>"
+      ],
+      "indonesian": [
+        1,
+        "\u2581<extra_id_98>"
+      ],
+      "japanese": [
+        37,
+        "\u2581<extra_id_62>"
+      ],
+      "kirundi": [
+        0,
+        "\u2581<extra_id_99>"
+      ],
+      "korean": [
+        29,
+        "\u2581<extra_id_70>"
+      ],
+      "kyrgyz": [
+        5,
+        "\u2581<extra_id_94>"
+      ],
+      "marathi": [
+        13,
+        "\u2581<extra_id_86>"
+      ],
+      "nepali": [
+        20,
+        "\u2581<extra_id_79>"
+      ],
+      "oromo": [
+        41,
+        "\u2581<extra_id_58>"
+      ],
+      "pashto": [
+        34,
+        "\u2581<extra_id_65>"
+      ],
+      "persian": [
+        23,
+        "\u2581<extra_id_76>"
+      ],
+      "pidgin": [
+        14,
+        "\u2581<extra_id_85>"
+      ],
+      "portuguese": [
+        39,
+        "\u2581<extra_id_60>"
+      ],
+      "punjabi": [
+        17,
+        "\u2581<extra_id_82>"
+      ],
+      "russian": [
+        36,
+        "\u2581<extra_id_63>"
+      ],
+      "scottish_gaelic": [
+        24,
+        "\u2581<extra_id_75>"
+      ],
+      "serbian_cyrillic": [
+        28,
+        "\u2581<extra_id_71>"
+      ],
+      "serbian_latin": [
+        11,
+        "\u2581<extra_id_88>"
+      ],
+      "sinhala": [
+        31,
+        "\u2581<extra_id_68>"
+      ],
+      "somali": [
+        19,
+        "\u2581<extra_id_80>"
+      ],
+      "spanish": [
+        3,
+        "\u2581<extra_id_96>"
+      ],
+      "swahili": [
+        18,
+        "\u2581<extra_id_81>"
+      ],
+      "tamil": [
+        32,
+        "\u2581<extra_id_67>"
+      ],
+      "telugu": [
+        22,
+        "\u2581<extra_id_77>"
+      ],
+      "thai": [
+        6,
+        "\u2581<extra_id_93>"
+      ],
+      "tigrinya": [
+        16,
+        "\u2581<extra_id_83>"
+      ],
+      "turkish": [
+        15,
+        "\u2581<extra_id_84>"
+      ],
+      "ukrainian": [
+        2,
+        "\u2581<extra_id_97>"
+      ],
+      "urdu": [
+        38,
+        "\u2581<extra_id_61>"
+      ],
+      "uzbek": [
+        8,
+        "\u2581<extra_id_91>"
+      ],
+      "vietnamese": [
+        12,
+        "\u2581<extra_id_87>"
+      ],
+      "welsh": [
+        26,
+        "\u2581<extra_id_73>"
+      ],
+      "yoruba": [
+        25,
+        "\u2581<extra_id_74>"
+      ]
+    }
+  },
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 250112
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83874ad0cc423c0ab26ef0854bdf51fd023fcba69869589958e68eadf7f296f2
+size 2329702453

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "additional_special_tokens": [
+    "▁<extra_id_64>",
+    "▁<extra_id_95>",
+    "▁<extra_id_92>",
+    "▁<extra_id_57>",
+    "▁<extra_id_66>",
+    "▁<extra_id_59>",
+    "▁<extra_id_55>",
+    "▁<extra_id_69>",
+    "▁<extra_id_89>",
+    "▁<extra_id_72>",
+    "▁<extra_id_56>",
+    "▁<extra_id_78>",
+    "▁<extra_id_90>",
+    "▁<extra_id_98>",
+    "▁<extra_id_62>",
+    "▁<extra_id_99>",
+    "▁<extra_id_70>",
+    "▁<extra_id_94>",
+    "▁<extra_id_86>",
+    "▁<extra_id_79>",
+    "▁<extra_id_58>",
+    "▁<extra_id_65>",
+    "▁<extra_id_76>",
+    "▁<extra_id_85>",
+    "▁<extra_id_60>",
+    "▁<extra_id_82>",
+    "▁<extra_id_63>",
+    "▁<extra_id_75>",
+    "▁<extra_id_71>",
+    "▁<extra_id_88>",
+    "▁<extra_id_68>",
+    "▁<extra_id_80>",
+    "▁<extra_id_96>",
+    "▁<extra_id_81>",
+    "▁<extra_id_67>",
+    "▁<extra_id_77>",
+    "▁<extra_id_93>",
+    "▁<extra_id_83>",
+    "▁<extra_id_84>",
+    "▁<extra_id_97>",
+    "▁<extra_id_61>",
+    "▁<extra_id_91>",
+    "▁<extra_id_87>",
+    "▁<extra_id_73>",
+    "▁<extra_id_74>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
+size 4309802

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af9b3ace1bbc6d9c245bb3de1c6b3615ade8e946290cf9b08c215ab1255de412
+size 16339151

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "additional_special_tokens": null,
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 0,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79b2dcc3502ea8ff1d8dfa0ed8958092f6a320ce3c8c5aa93b53e47b0f576eec
+size 4091