diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..e3d4fcf7e2a8735ae3efc903926a68b8238225ec 100644 --- a/.gitattributes +++ b/.gitattributes @@ -32,3 +32,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +mt5-new-summarize-final/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mt5-new-summarize-final/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mt5-new-summarize-final/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mt5-new-summarize-final/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mt5-new-summarize-final/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mt5-new-summarize-final/checkpoint-1000/config.json b/mt5-new-summarize-final/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..139799c6bf6129096048bf33ed41a9cb2f2eb678 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "MT5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "length_penalty": 0.6, + "max_length": 128, + "model_type": "mt5", + "no_repeat_ngram_size": 2, + "num_beams": 15, + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.26.0", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/mt5-new-summarize-final/checkpoint-1000/generation_config.json b/mt5-new-summarize-final/checkpoint-1000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5a5137b6c89be4407a330b5240bbd35976c1380 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "length_penalty": 0.6, + "max_length": 128, + "no_repeat_ngram_size": 2, + "num_beams": 15, + "pad_token_id": 0, + "transformers_version": "4.26.0" +} diff --git a/mt5-new-summarize-final/checkpoint-1000/optimizer.pt b/mt5-new-summarize-final/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6db4f1a04bf68f6422abfd45288acba50e5ee31f --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3baa2f0acf6564b212a6bede34f1f382b1d244d1399dc6e36525e87f19aff8 +size 2879429 diff --git a/mt5-new-summarize-final/checkpoint-1000/rng_state.pth b/mt5-new-summarize-final/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..22e1e214635672bc69cc743c4cd0330ce6a01e43 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b72fbffc25dd7aa7d70e3be24dbad99632d4072a14d2ccf2e19ebfaa6ee482 +size 14575 diff --git a/mt5-new-summarize-final/checkpoint-1000/scheduler.pt b/mt5-new-summarize-final/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8894c2d75e257e78914f0d2fb27723f6bb92fc04 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb7cf2c3437ec60e3b841c26e0417054892b0d02ce922430502dd61a0091fd1 +size 627 diff --git a/mt5-new-summarize-final/checkpoint-1000/special_tokens_map.json b/mt5-new-summarize-final/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-1000/spiece.model b/mt5-new-summarize-final/checkpoint-1000/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/mt5-new-summarize-final/checkpoint-1000/tokenizer.json b/mt5-new-summarize-final/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..db5a8773175d9750d86374dc47b64f6d55615279 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5 +size 16330369 diff --git a/mt5-new-summarize-final/checkpoint-1000/tokenizer_config.json b/mt5-new-summarize-final/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9f3a5b62b2d1a57a16a8ae5ef280ed093e45bf --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "additional_special_tokens": null, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "google/mt5-small", + "pad_token": "", + "sp_model_kwargs": {}, + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json", + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-1000/trainer_state.json b/mt5-new-summarize-final/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bec000f86a766db87e473a313593e35a36ea4acf --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/trainer_state.json @@ -0,0 +1,628 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.426928516156576, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 20.8315, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001111111111111111, + "loss": 16.4875, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016666666666666666, + "loss": 10.6503, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002222222222222222, + "loss": 6.6236, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.2548, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003333333333333333, + "loss": 4.6801, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003888888888888889, + "loss": 4.3374, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004444444444444444, + "loss": 3.9614, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005, + "loss": 3.9704, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004977797513321492, + "loss": 3.8112, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004955595026642984, + "loss": 3.6272, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004933392539964477, + "loss": 3.5572, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004911190053285969, + "loss": 3.3966, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 0.000488898756660746, + "loss": 3.4632, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004866785079928952, + "loss": 3.4591, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004844582593250444, + "loss": 3.2218, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004822380106571936, + "loss": 3.4101, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00048001776198934283, + "loss": 3.2787, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 0.000477797513321492, + "loss": 3.1043, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004755772646536412, + "loss": 3.2435, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047335701598579037, + "loss": 3.2286, + "step": 210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047113676731793964, + "loss": 3.1484, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 0.00046891651865008885, + "loss": 3.1817, + "step": 230 + }, + { + "epoch": 0.1, + "learning_rate": 0.000466696269982238, + "loss": 3.1608, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 0.00046447602131438723, + "loss": 3.1457, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004622557726465364, + "loss": 3.1212, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00046003552397868566, + "loss": 2.9937, + "step": 270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004578152753108348, + "loss": 3.2031, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00045559502664298403, + "loss": 3.0713, + "step": 290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004533747779751332, + "loss": 3.0352, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004511545293072824, + "loss": 3.0872, + "step": 310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004489342806394316, + "loss": 3.0049, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00044671403197158084, + "loss": 3.0659, + "step": 330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044449378330373, + "loss": 3.0852, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004422735346358792, + "loss": 2.9506, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044005328596802843, + "loss": 3.027, + "step": 360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043783303730017764, + "loss": 2.8812, + "step": 370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043561278863232686, + "loss": 2.9738, + "step": 380 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433392539964476, + "loss": 3.0504, + "step": 390 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043117229129662523, + "loss": 3.0195, + "step": 400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004289520426287744, + "loss": 2.968, + "step": 410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00042673179396092366, + "loss": 3.0864, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004245115452930728, + "loss": 2.8735, + "step": 430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00042229129662522204, + "loss": 3.0391, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004200710479573712, + "loss": 2.8288, + "step": 450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004178507992895204, + "loss": 3.0182, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004156305506216697, + "loss": 3.0606, + "step": 470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00041341030195381884, + "loss": 2.9467, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 0.00041119005328596806, + "loss": 2.7842, + "step": 490 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004089698046181172, + "loss": 2.8472, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040674955595026643, + "loss": 2.7883, + "step": 510 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040452930728241565, + "loss": 2.8592, + "step": 520 + }, + { + "epoch": 0.23, + "learning_rate": 0.00040230905861456486, + "loss": 2.9186, + "step": 530 + }, + { + "epoch": 0.23, + "learning_rate": 0.000400088809946714, + "loss": 2.911, + "step": 540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039786856127886324, + "loss": 2.8355, + "step": 550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003956483126110124, + "loss": 2.9385, + "step": 560 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039342806394316167, + "loss": 2.8465, + "step": 570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003912078152753108, + "loss": 2.8408, + "step": 580 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038898756660746004, + "loss": 2.767, + "step": 590 + }, + { + "epoch": 0.26, + "learning_rate": 0.00038676731793960926, + "loss": 2.9691, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003845470692717584, + "loss": 2.7348, + "step": 610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003823268206039077, + "loss": 2.7477, + "step": 620 + }, + { + "epoch": 0.27, + "learning_rate": 0.00038010657193605685, + "loss": 2.8142, + "step": 630 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037788632326820606, + "loss": 2.7522, + "step": 640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003756660746003552, + "loss": 2.9403, + "step": 650 + }, + { + "epoch": 0.28, + "learning_rate": 0.00037344582593250444, + "loss": 2.8956, + "step": 660 + }, + { + "epoch": 0.29, + "learning_rate": 0.00037122557726465365, + "loss": 2.7331, + "step": 670 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036900532859680287, + "loss": 2.8265, + "step": 680 + }, + { + "epoch": 0.29, + "learning_rate": 0.000366785079928952, + "loss": 2.8919, + "step": 690 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036456483126110124, + "loss": 2.8362, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003623445825932504, + "loss": 2.7282, + "step": 710 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036012433392539967, + "loss": 2.7628, + "step": 720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003579040852575489, + "loss": 2.8508, + "step": 730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035568383658969805, + "loss": 2.8603, + "step": 740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035346358792184726, + "loss": 2.6765, + "step": 750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003512433392539964, + "loss": 2.8767, + "step": 760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003490230905861457, + "loss": 2.8446, + "step": 770 + }, + { + "epoch": 0.33, + "learning_rate": 0.00034680284191829485, + "loss": 2.8524, + "step": 780 + }, + { + "epoch": 0.34, + "learning_rate": 0.00034458259325044407, + "loss": 2.778, + "step": 790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003423623445825932, + "loss": 2.7359, + "step": 800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00034014209591474244, + "loss": 2.7803, + "step": 810 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033792184724689166, + "loss": 2.7631, + "step": 820 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033570159857904087, + "loss": 2.7818, + "step": 830 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033348134991119003, + "loss": 2.6591, + "step": 840 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033126110124333925, + "loss": 2.6843, + "step": 850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00032904085257548846, + "loss": 2.7299, + "step": 860 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003268206039076377, + "loss": 2.6928, + "step": 870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003246003552397869, + "loss": 2.8512, + "step": 880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032238010657193605, + "loss": 2.7944, + "step": 890 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032015985790408526, + "loss": 2.7365, + "step": 900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003179396092362344, + "loss": 2.6992, + "step": 910 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003157193605683837, + "loss": 2.743, + "step": 920 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031349911190053285, + "loss": 2.8022, + "step": 930 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031127886323268207, + "loss": 2.6603, + "step": 940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030905861456483123, + "loss": 2.6277, + "step": 950 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030683836589698044, + "loss": 2.7652, + "step": 960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003046181172291297, + "loss": 2.7063, + "step": 970 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003023978685612789, + "loss": 2.6602, + "step": 980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003001776198934281, + "loss": 2.7592, + "step": 990 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029795737122557725, + "loss": 2.6628, + "step": 1000 + }, + { + "epoch": 0.43, + "eval_loss": 0.51519775390625, + "eval_rouge1": 0.8067071482496617, + "eval_rouge2": 0.7472387953801375, + "eval_rougeL": 0.7071393649432568, + "eval_rougeLsum": 0.8209607917685595, + "eval_runtime": 1807.8081, + "eval_samples_per_second": 0.553, + "eval_steps_per_second": 0.553, + "step": 1000 + } + ], + "max_steps": 2342, + "num_train_epochs": 1, + "total_flos": 2.369574390079488e+16, + "trial_name": null, + "trial_params": null +} diff --git a/mt5-new-summarize-final/checkpoint-1000/training_args.bin b/mt5-new-summarize-final/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc9d3b4d1721cf653ac83f80538b07e6a0b8161a --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a568dcedd606fd72b432d2c6cc678ea8c5e086885a5642ed616a886f52eb9 +size 3643 diff --git a/mt5-new-summarize-final/checkpoint-1500/config.json b/mt5-new-summarize-final/checkpoint-1500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..139799c6bf6129096048bf33ed41a9cb2f2eb678 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "MT5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "length_penalty": 0.6, + "max_length": 128, + "model_type": "mt5", + "no_repeat_ngram_size": 2, + "num_beams": 15, + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.26.0", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/mt5-new-summarize-final/checkpoint-1500/generation_config.json b/mt5-new-summarize-final/checkpoint-1500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5a5137b6c89be4407a330b5240bbd35976c1380 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "length_penalty": 0.6, + "max_length": 128, + "no_repeat_ngram_size": 2, + "num_beams": 15, + "pad_token_id": 0, + "transformers_version": "4.26.0" +} diff --git a/mt5-new-summarize-final/checkpoint-1500/optimizer.pt b/mt5-new-summarize-final/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..435d9aad1390f8cb2ff5b4b744a33b147a7eaeac --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e68e0cee8d46a585d3814147b91f8b4bc012059656c68afb9a2a293be699d9 +size 2879429 diff --git a/mt5-new-summarize-final/checkpoint-1500/rng_state.pth b/mt5-new-summarize-final/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..48d7f2f54f3223d26a7b9af30829af0da83f869c --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e8b74df13be81372e8f28a3be8d00e9fa6155ca3f835bba73863ea1acbfd61 +size 14575 diff --git a/mt5-new-summarize-final/checkpoint-1500/scheduler.pt b/mt5-new-summarize-final/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a40c7f8d5a9a53437c392dc271964e5c67a22ac --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07335cb72d3786e62311ca2b940c12fc5ac51a973658311a4a7d90cbf94853af +size 627 diff --git a/mt5-new-summarize-final/checkpoint-1500/special_tokens_map.json b/mt5-new-summarize-final/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-1500/spiece.model b/mt5-new-summarize-final/checkpoint-1500/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/mt5-new-summarize-final/checkpoint-1500/tokenizer.json b/mt5-new-summarize-final/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..db5a8773175d9750d86374dc47b64f6d55615279 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5 +size 16330369 diff --git a/mt5-new-summarize-final/checkpoint-1500/tokenizer_config.json b/mt5-new-summarize-final/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9f3a5b62b2d1a57a16a8ae5ef280ed093e45bf --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "additional_special_tokens": null, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "google/mt5-small", + "pad_token": "", + "sp_model_kwargs": {}, + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json", + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-1500/trainer_state.json b/mt5-new-summarize-final/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..853aff88b558394624d3278898a56be6c2ffec32 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/trainer_state.json @@ -0,0 +1,928 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6403927742348641, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 20.8315, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001111111111111111, + "loss": 16.4875, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016666666666666666, + "loss": 10.6503, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002222222222222222, + "loss": 6.6236, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.2548, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003333333333333333, + "loss": 4.6801, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003888888888888889, + "loss": 4.3374, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004444444444444444, + "loss": 3.9614, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005, + "loss": 3.9704, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004977797513321492, + "loss": 3.8112, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004955595026642984, + "loss": 3.6272, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004933392539964477, + "loss": 3.5572, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004911190053285969, + "loss": 3.3966, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 0.000488898756660746, + "loss": 3.4632, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004866785079928952, + "loss": 3.4591, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004844582593250444, + "loss": 3.2218, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004822380106571936, + "loss": 3.4101, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00048001776198934283, + "loss": 3.2787, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 0.000477797513321492, + "loss": 3.1043, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004755772646536412, + "loss": 3.2435, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047335701598579037, + "loss": 3.2286, + "step": 210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047113676731793964, + "loss": 3.1484, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 0.00046891651865008885, + "loss": 3.1817, + "step": 230 + }, + { + "epoch": 0.1, + "learning_rate": 0.000466696269982238, + "loss": 3.1608, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 0.00046447602131438723, + "loss": 3.1457, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004622557726465364, + "loss": 3.1212, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00046003552397868566, + "loss": 2.9937, + "step": 270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004578152753108348, + "loss": 3.2031, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00045559502664298403, + "loss": 3.0713, + "step": 290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004533747779751332, + "loss": 3.0352, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004511545293072824, + "loss": 3.0872, + "step": 310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004489342806394316, + "loss": 3.0049, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00044671403197158084, + "loss": 3.0659, + "step": 330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044449378330373, + "loss": 3.0852, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004422735346358792, + "loss": 2.9506, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044005328596802843, + "loss": 3.027, + "step": 360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043783303730017764, + "loss": 2.8812, + "step": 370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043561278863232686, + "loss": 2.9738, + "step": 380 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433392539964476, + "loss": 3.0504, + "step": 390 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043117229129662523, + "loss": 3.0195, + "step": 400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004289520426287744, + "loss": 2.968, + "step": 410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00042673179396092366, + "loss": 3.0864, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004245115452930728, + "loss": 2.8735, + "step": 430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00042229129662522204, + "loss": 3.0391, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004200710479573712, + "loss": 2.8288, + "step": 450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004178507992895204, + "loss": 3.0182, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004156305506216697, + "loss": 3.0606, + "step": 470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00041341030195381884, + "loss": 2.9467, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 0.00041119005328596806, + "loss": 2.7842, + "step": 490 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004089698046181172, + "loss": 2.8472, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040674955595026643, + "loss": 2.7883, + "step": 510 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040452930728241565, + "loss": 2.8592, + "step": 520 + }, + { + "epoch": 0.23, + "learning_rate": 0.00040230905861456486, + "loss": 2.9186, + "step": 530 + }, + { + "epoch": 0.23, + "learning_rate": 0.000400088809946714, + "loss": 2.911, + "step": 540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039786856127886324, + "loss": 2.8355, + "step": 550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003956483126110124, + "loss": 2.9385, + "step": 560 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039342806394316167, + "loss": 2.8465, + "step": 570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003912078152753108, + "loss": 2.8408, + "step": 580 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038898756660746004, + "loss": 2.767, + "step": 590 + }, + { + "epoch": 0.26, + "learning_rate": 0.00038676731793960926, + "loss": 2.9691, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003845470692717584, + "loss": 2.7348, + "step": 610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003823268206039077, + "loss": 2.7477, + "step": 620 + }, + { + "epoch": 0.27, + "learning_rate": 0.00038010657193605685, + "loss": 2.8142, + "step": 630 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037788632326820606, + "loss": 2.7522, + "step": 640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003756660746003552, + "loss": 2.9403, + "step": 650 + }, + { + "epoch": 0.28, + "learning_rate": 0.00037344582593250444, + "loss": 2.8956, + "step": 660 + }, + { + "epoch": 0.29, + "learning_rate": 0.00037122557726465365, + "loss": 2.7331, + "step": 670 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036900532859680287, + "loss": 2.8265, + "step": 680 + }, + { + "epoch": 0.29, + "learning_rate": 0.000366785079928952, + "loss": 2.8919, + "step": 690 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036456483126110124, + "loss": 2.8362, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003623445825932504, + "loss": 2.7282, + "step": 710 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036012433392539967, + "loss": 2.7628, + "step": 720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003579040852575489, + "loss": 2.8508, + "step": 730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035568383658969805, + "loss": 2.8603, + "step": 740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035346358792184726, + "loss": 2.6765, + "step": 750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003512433392539964, + "loss": 2.8767, + "step": 760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003490230905861457, + "loss": 2.8446, + "step": 770 + }, + { + "epoch": 0.33, + "learning_rate": 0.00034680284191829485, + "loss": 2.8524, + "step": 780 + }, + { + "epoch": 0.34, + "learning_rate": 0.00034458259325044407, + "loss": 2.778, + "step": 790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003423623445825932, + "loss": 2.7359, + "step": 800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00034014209591474244, + "loss": 2.7803, + "step": 810 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033792184724689166, + "loss": 2.7631, + "step": 820 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033570159857904087, + "loss": 2.7818, + "step": 830 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033348134991119003, + "loss": 2.6591, + "step": 840 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033126110124333925, + "loss": 2.6843, + "step": 850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00032904085257548846, + "loss": 2.7299, + "step": 860 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003268206039076377, + "loss": 2.6928, + "step": 870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003246003552397869, + "loss": 2.8512, + "step": 880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032238010657193605, + "loss": 2.7944, + "step": 890 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032015985790408526, + "loss": 2.7365, + "step": 900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003179396092362344, + "loss": 2.6992, + "step": 910 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003157193605683837, + "loss": 2.743, + "step": 920 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031349911190053285, + "loss": 2.8022, + "step": 930 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031127886323268207, + "loss": 2.6603, + "step": 940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030905861456483123, + "loss": 2.6277, + "step": 950 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030683836589698044, + "loss": 2.7652, + "step": 960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003046181172291297, + "loss": 2.7063, + "step": 970 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003023978685612789, + "loss": 2.6602, + "step": 980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003001776198934281, + "loss": 2.7592, + "step": 990 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029795737122557725, + "loss": 2.6628, + "step": 1000 + }, + { + "epoch": 0.43, + "eval_loss": 0.51519775390625, + "eval_rouge1": 0.8067071482496617, + "eval_rouge2": 0.7472387953801375, + "eval_rougeL": 0.7071393649432568, + "eval_rougeLsum": 0.8209607917685595, + "eval_runtime": 1807.8081, + "eval_samples_per_second": 0.553, + "eval_steps_per_second": 0.553, + "step": 1000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029573712255772646, + "loss": 2.6164, + "step": 1010 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002935168738898757, + "loss": 2.6947, + "step": 1020 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002912966252220249, + "loss": 2.4547, + "step": 1030 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028907637655417405, + "loss": 2.7347, + "step": 1040 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028685612788632327, + "loss": 2.6491, + "step": 1050 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028463587921847243, + "loss": 2.6778, + "step": 1060 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002824156305506217, + "loss": 2.7157, + "step": 1070 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028019538188277086, + "loss": 2.6445, + "step": 1080 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002779751332149201, + "loss": 2.7732, + "step": 1090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002757548845470693, + "loss": 2.6777, + "step": 1100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027353463587921845, + "loss": 2.6231, + "step": 1110 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002713143872113677, + "loss": 2.6003, + "step": 1120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002690941385435169, + "loss": 2.7226, + "step": 1130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002668738898756661, + "loss": 2.8623, + "step": 1140 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026465364120781525, + "loss": 2.7391, + "step": 1150 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026243339253996447, + "loss": 2.6836, + "step": 1160 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002602131438721137, + "loss": 2.6357, + "step": 1170 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002579928952042629, + "loss": 2.5891, + "step": 1180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025577264653641206, + "loss": 2.7383, + "step": 1190 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025355239786856127, + "loss": 2.738, + "step": 1200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00025133214920071043, + "loss": 2.62, + "step": 1210 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002491119005328597, + "loss": 2.5796, + "step": 1220 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002468916518650089, + "loss": 2.5744, + "step": 1230 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002446714031971581, + "loss": 2.5047, + "step": 1240 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002424511545293073, + "loss": 2.5233, + "step": 1250 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024023090586145648, + "loss": 2.5248, + "step": 1260 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023801065719360567, + "loss": 2.6073, + "step": 1270 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002357904085257549, + "loss": 2.596, + "step": 1280 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002335701598579041, + "loss": 2.6899, + "step": 1290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002313499111900533, + "loss": 2.6293, + "step": 1300 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002291296625222025, + "loss": 2.639, + "step": 1310 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002269094138543517, + "loss": 2.7105, + "step": 1320 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002246891651865009, + "loss": 2.723, + "step": 1330 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002224689165186501, + "loss": 2.5697, + "step": 1340 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002202486678507993, + "loss": 2.715, + "step": 1350 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002180284191829485, + "loss": 2.6387, + "step": 1360 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021580817051509768, + "loss": 2.5276, + "step": 1370 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002135879218472469, + "loss": 2.627, + "step": 1380 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021136767317939608, + "loss": 2.5249, + "step": 1390 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002091474245115453, + "loss": 2.629, + "step": 1400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002069271758436945, + "loss": 2.703, + "step": 1410 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002047069271758437, + "loss": 2.544, + "step": 1420 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002024866785079929, + "loss": 2.5992, + "step": 1430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002002664298401421, + "loss": 2.6988, + "step": 1440 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019804618117229132, + "loss": 2.6195, + "step": 1450 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001958259325044405, + "loss": 2.6247, + "step": 1460 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001936056838365897, + "loss": 2.5853, + "step": 1470 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001913854351687389, + "loss": 2.6298, + "step": 1480 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001891651865008881, + "loss": 2.6378, + "step": 1490 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001869449378330373, + "loss": 2.5682, + "step": 1500 + } + ], + "max_steps": 2342, + "num_train_epochs": 1, + "total_flos": 3.54937584731136e+16, + "trial_name": null, + "trial_params": null +} diff --git a/mt5-new-summarize-final/checkpoint-1500/training_args.bin b/mt5-new-summarize-final/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc9d3b4d1721cf653ac83f80538b07e6a0b8161a --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a568dcedd606fd72b432d2c6cc678ea8c5e086885a5642ed616a886f52eb9 +size 3643 diff --git a/mt5-new-summarize-final/checkpoint-2000/config.json b/mt5-new-summarize-final/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..139799c6bf6129096048bf33ed41a9cb2f2eb678 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "MT5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "length_penalty": 0.6, + "max_length": 128, + "model_type": "mt5", + "no_repeat_ngram_size": 2, + "num_beams": 15, + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.26.0", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/mt5-new-summarize-final/checkpoint-2000/generation_config.json b/mt5-new-summarize-final/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5a5137b6c89be4407a330b5240bbd35976c1380 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "length_penalty": 0.6, + "max_length": 128, + "no_repeat_ngram_size": 2, + "num_beams": 15, + "pad_token_id": 0, + "transformers_version": "4.26.0" +} diff --git a/mt5-new-summarize-final/checkpoint-2000/optimizer.pt b/mt5-new-summarize-final/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b36bfbb297095155019d36ebd59246fc29feb2e --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8398a38588a1d29c290e233a3aa7d80a655280542c57444f7f1ef146e8131d2e +size 2879429 diff --git a/mt5-new-summarize-final/checkpoint-2000/rng_state.pth b/mt5-new-summarize-final/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3c351a5d46d6084409302c69db147eba2efe479 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f116c931fa378a37a851637fc90de078ddb5e1fd5892542c59de11eeac2cd8f3 +size 14575 diff --git a/mt5-new-summarize-final/checkpoint-2000/scheduler.pt b/mt5-new-summarize-final/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce6972ea0d2cd0b7126e1b9e9187ef6247ca6988 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed71672921938371fe751b4cf233d20e73dc5785b84b089e803a193211e35ae +size 627 diff --git a/mt5-new-summarize-final/checkpoint-2000/special_tokens_map.json b/mt5-new-summarize-final/checkpoint-2000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-2000/spiece.model b/mt5-new-summarize-final/checkpoint-2000/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/mt5-new-summarize-final/checkpoint-2000/tokenizer.json b/mt5-new-summarize-final/checkpoint-2000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..db5a8773175d9750d86374dc47b64f6d55615279 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5 +size 16330369 diff --git a/mt5-new-summarize-final/checkpoint-2000/tokenizer_config.json b/mt5-new-summarize-final/checkpoint-2000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9f3a5b62b2d1a57a16a8ae5ef280ed093e45bf --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "additional_special_tokens": null, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "google/mt5-small", + "pad_token": "", + "sp_model_kwargs": {}, + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json", + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-2000/trainer_state.json b/mt5-new-summarize-final/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..41618abef1a59368ad2449208c94082408b79e92 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/trainer_state.json @@ -0,0 +1,1240 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.853857032313152, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 20.8315, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001111111111111111, + "loss": 16.4875, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016666666666666666, + "loss": 10.6503, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002222222222222222, + "loss": 6.6236, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.2548, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003333333333333333, + "loss": 4.6801, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003888888888888889, + "loss": 4.3374, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004444444444444444, + "loss": 3.9614, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005, + "loss": 3.9704, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004977797513321492, + "loss": 3.8112, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004955595026642984, + "loss": 3.6272, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004933392539964477, + "loss": 3.5572, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004911190053285969, + "loss": 3.3966, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 0.000488898756660746, + "loss": 3.4632, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004866785079928952, + "loss": 3.4591, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004844582593250444, + "loss": 3.2218, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004822380106571936, + "loss": 3.4101, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00048001776198934283, + "loss": 3.2787, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 0.000477797513321492, + "loss": 3.1043, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004755772646536412, + "loss": 3.2435, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047335701598579037, + "loss": 3.2286, + "step": 210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047113676731793964, + "loss": 3.1484, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 0.00046891651865008885, + "loss": 3.1817, + "step": 230 + }, + { + "epoch": 0.1, + "learning_rate": 0.000466696269982238, + "loss": 3.1608, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 0.00046447602131438723, + "loss": 3.1457, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004622557726465364, + "loss": 3.1212, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00046003552397868566, + "loss": 2.9937, + "step": 270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004578152753108348, + "loss": 3.2031, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00045559502664298403, + "loss": 3.0713, + "step": 290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004533747779751332, + "loss": 3.0352, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004511545293072824, + "loss": 3.0872, + "step": 310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004489342806394316, + "loss": 3.0049, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00044671403197158084, + "loss": 3.0659, + "step": 330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044449378330373, + "loss": 3.0852, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004422735346358792, + "loss": 2.9506, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044005328596802843, + "loss": 3.027, + "step": 360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043783303730017764, + "loss": 2.8812, + "step": 370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043561278863232686, + "loss": 2.9738, + "step": 380 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433392539964476, + "loss": 3.0504, + "step": 390 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043117229129662523, + "loss": 3.0195, + "step": 400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004289520426287744, + "loss": 2.968, + "step": 410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00042673179396092366, + "loss": 3.0864, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004245115452930728, + "loss": 2.8735, + "step": 430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00042229129662522204, + "loss": 3.0391, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004200710479573712, + "loss": 2.8288, + "step": 450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004178507992895204, + "loss": 3.0182, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004156305506216697, + "loss": 3.0606, + "step": 470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00041341030195381884, + "loss": 2.9467, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 0.00041119005328596806, + "loss": 2.7842, + "step": 490 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004089698046181172, + "loss": 2.8472, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040674955595026643, + "loss": 2.7883, + "step": 510 + }, + { + "epoch": 0.22, + "learning_rate": 0.00040452930728241565, + "loss": 2.8592, + "step": 520 + }, + { + "epoch": 0.23, + "learning_rate": 0.00040230905861456486, + "loss": 2.9186, + "step": 530 + }, + { + "epoch": 0.23, + "learning_rate": 0.000400088809946714, + "loss": 2.911, + "step": 540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039786856127886324, + "loss": 2.8355, + "step": 550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003956483126110124, + "loss": 2.9385, + "step": 560 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039342806394316167, + "loss": 2.8465, + "step": 570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003912078152753108, + "loss": 2.8408, + "step": 580 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038898756660746004, + "loss": 2.767, + "step": 590 + }, + { + "epoch": 0.26, + "learning_rate": 0.00038676731793960926, + "loss": 2.9691, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003845470692717584, + "loss": 2.7348, + "step": 610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003823268206039077, + "loss": 2.7477, + "step": 620 + }, + { + "epoch": 0.27, + "learning_rate": 0.00038010657193605685, + "loss": 2.8142, + "step": 630 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037788632326820606, + "loss": 2.7522, + "step": 640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003756660746003552, + "loss": 2.9403, + "step": 650 + }, + { + "epoch": 0.28, + "learning_rate": 0.00037344582593250444, + "loss": 2.8956, + "step": 660 + }, + { + "epoch": 0.29, + "learning_rate": 0.00037122557726465365, + "loss": 2.7331, + "step": 670 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036900532859680287, + "loss": 2.8265, + "step": 680 + }, + { + "epoch": 0.29, + "learning_rate": 0.000366785079928952, + "loss": 2.8919, + "step": 690 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036456483126110124, + "loss": 2.8362, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003623445825932504, + "loss": 2.7282, + "step": 710 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036012433392539967, + "loss": 2.7628, + "step": 720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003579040852575489, + "loss": 2.8508, + "step": 730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035568383658969805, + "loss": 2.8603, + "step": 740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00035346358792184726, + "loss": 2.6765, + "step": 750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003512433392539964, + "loss": 2.8767, + "step": 760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003490230905861457, + "loss": 2.8446, + "step": 770 + }, + { + "epoch": 0.33, + "learning_rate": 0.00034680284191829485, + "loss": 2.8524, + "step": 780 + }, + { + "epoch": 0.34, + "learning_rate": 0.00034458259325044407, + "loss": 2.778, + "step": 790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003423623445825932, + "loss": 2.7359, + "step": 800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00034014209591474244, + "loss": 2.7803, + "step": 810 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033792184724689166, + "loss": 2.7631, + "step": 820 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033570159857904087, + "loss": 2.7818, + "step": 830 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033348134991119003, + "loss": 2.6591, + "step": 840 + }, + { + "epoch": 0.36, + "learning_rate": 0.00033126110124333925, + "loss": 2.6843, + "step": 850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00032904085257548846, + "loss": 2.7299, + "step": 860 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003268206039076377, + "loss": 2.6928, + "step": 870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003246003552397869, + "loss": 2.8512, + "step": 880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032238010657193605, + "loss": 2.7944, + "step": 890 + }, + { + "epoch": 0.38, + "learning_rate": 0.00032015985790408526, + "loss": 2.7365, + "step": 900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003179396092362344, + "loss": 2.6992, + "step": 910 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003157193605683837, + "loss": 2.743, + "step": 920 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031349911190053285, + "loss": 2.8022, + "step": 930 + }, + { + "epoch": 0.4, + "learning_rate": 0.00031127886323268207, + "loss": 2.6603, + "step": 940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030905861456483123, + "loss": 2.6277, + "step": 950 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030683836589698044, + "loss": 2.7652, + "step": 960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003046181172291297, + "loss": 2.7063, + "step": 970 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003023978685612789, + "loss": 2.6602, + "step": 980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003001776198934281, + "loss": 2.7592, + "step": 990 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029795737122557725, + "loss": 2.6628, + "step": 1000 + }, + { + "epoch": 0.43, + "eval_loss": 0.51519775390625, + "eval_rouge1": 0.8067071482496617, + "eval_rouge2": 0.7472387953801375, + "eval_rougeL": 0.7071393649432568, + "eval_rougeLsum": 0.8209607917685595, + "eval_runtime": 1807.8081, + "eval_samples_per_second": 0.553, + "eval_steps_per_second": 0.553, + "step": 1000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029573712255772646, + "loss": 2.6164, + "step": 1010 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002935168738898757, + "loss": 2.6947, + "step": 1020 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002912966252220249, + "loss": 2.4547, + "step": 1030 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028907637655417405, + "loss": 2.7347, + "step": 1040 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028685612788632327, + "loss": 2.6491, + "step": 1050 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028463587921847243, + "loss": 2.6778, + "step": 1060 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002824156305506217, + "loss": 2.7157, + "step": 1070 + }, + { + "epoch": 0.46, + "learning_rate": 0.00028019538188277086, + "loss": 2.6445, + "step": 1080 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002779751332149201, + "loss": 2.7732, + "step": 1090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002757548845470693, + "loss": 2.6777, + "step": 1100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027353463587921845, + "loss": 2.6231, + "step": 1110 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002713143872113677, + "loss": 2.6003, + "step": 1120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002690941385435169, + "loss": 2.7226, + "step": 1130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002668738898756661, + "loss": 2.8623, + "step": 1140 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026465364120781525, + "loss": 2.7391, + "step": 1150 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026243339253996447, + "loss": 2.6836, + "step": 1160 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002602131438721137, + "loss": 2.6357, + "step": 1170 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002579928952042629, + "loss": 2.5891, + "step": 1180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025577264653641206, + "loss": 2.7383, + "step": 1190 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025355239786856127, + "loss": 2.738, + "step": 1200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00025133214920071043, + "loss": 2.62, + "step": 1210 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002491119005328597, + "loss": 2.5796, + "step": 1220 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002468916518650089, + "loss": 2.5744, + "step": 1230 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002446714031971581, + "loss": 2.5047, + "step": 1240 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002424511545293073, + "loss": 2.5233, + "step": 1250 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024023090586145648, + "loss": 2.5248, + "step": 1260 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023801065719360567, + "loss": 2.6073, + "step": 1270 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002357904085257549, + "loss": 2.596, + "step": 1280 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002335701598579041, + "loss": 2.6899, + "step": 1290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002313499111900533, + "loss": 2.6293, + "step": 1300 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002291296625222025, + "loss": 2.639, + "step": 1310 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002269094138543517, + "loss": 2.7105, + "step": 1320 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002246891651865009, + "loss": 2.723, + "step": 1330 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002224689165186501, + "loss": 2.5697, + "step": 1340 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002202486678507993, + "loss": 2.715, + "step": 1350 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002180284191829485, + "loss": 2.6387, + "step": 1360 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021580817051509768, + "loss": 2.5276, + "step": 1370 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002135879218472469, + "loss": 2.627, + "step": 1380 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021136767317939608, + "loss": 2.5249, + "step": 1390 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002091474245115453, + "loss": 2.629, + "step": 1400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002069271758436945, + "loss": 2.703, + "step": 1410 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002047069271758437, + "loss": 2.544, + "step": 1420 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002024866785079929, + "loss": 2.5992, + "step": 1430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002002664298401421, + "loss": 2.6988, + "step": 1440 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019804618117229132, + "loss": 2.6195, + "step": 1450 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001958259325044405, + "loss": 2.6247, + "step": 1460 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001936056838365897, + "loss": 2.5853, + "step": 1470 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001913854351687389, + "loss": 2.6298, + "step": 1480 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001891651865008881, + "loss": 2.6378, + "step": 1490 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001869449378330373, + "loss": 2.5682, + "step": 1500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001847246891651865, + "loss": 2.4707, + "step": 1510 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018250444049733568, + "loss": 2.6558, + "step": 1520 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018028419182948492, + "loss": 2.5891, + "step": 1530 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001780639431616341, + "loss": 2.6053, + "step": 1540 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017584369449378333, + "loss": 2.7265, + "step": 1550 + }, + { + "epoch": 0.67, + "learning_rate": 0.00017362344582593251, + "loss": 2.604, + "step": 1560 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001714031971580817, + "loss": 2.5988, + "step": 1570 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016918294849023092, + "loss": 2.5128, + "step": 1580 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001669626998223801, + "loss": 2.7477, + "step": 1590 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016474245115452932, + "loss": 2.5823, + "step": 1600 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001625222024866785, + "loss": 2.6041, + "step": 1610 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001603019538188277, + "loss": 2.6499, + "step": 1620 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001580817051509769, + "loss": 2.5358, + "step": 1630 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001558614564831261, + "loss": 2.636, + "step": 1640 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001536412078152753, + "loss": 2.6257, + "step": 1650 + }, + { + "epoch": 0.71, + "learning_rate": 0.00015142095914742453, + "loss": 2.6454, + "step": 1660 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014920071047957371, + "loss": 2.563, + "step": 1670 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014698046181172293, + "loss": 2.6789, + "step": 1680 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014476021314387212, + "loss": 2.5691, + "step": 1690 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014253996447602133, + "loss": 2.4895, + "step": 1700 + }, + { + "epoch": 0.73, + "learning_rate": 0.00014031971580817052, + "loss": 2.5423, + "step": 1710 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001380994671403197, + "loss": 2.6112, + "step": 1720 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013587921847246892, + "loss": 2.5571, + "step": 1730 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001336589698046181, + "loss": 2.5789, + "step": 1740 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013143872113676732, + "loss": 2.6101, + "step": 1750 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001292184724689165, + "loss": 2.591, + "step": 1760 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001269982238010657, + "loss": 2.6079, + "step": 1770 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012477797513321494, + "loss": 2.6617, + "step": 1780 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012255772646536413, + "loss": 2.579, + "step": 1790 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012033747779751332, + "loss": 2.594, + "step": 1800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011811722912966252, + "loss": 2.6464, + "step": 1810 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011589698046181173, + "loss": 2.6125, + "step": 1820 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011367673179396093, + "loss": 2.6031, + "step": 1830 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011145648312611012, + "loss": 2.5603, + "step": 1840 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010923623445825932, + "loss": 2.5277, + "step": 1850 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010701598579040852, + "loss": 2.6054, + "step": 1860 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010479573712255772, + "loss": 2.4357, + "step": 1870 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010257548845470694, + "loss": 2.5417, + "step": 1880 + }, + { + "epoch": 0.81, + "learning_rate": 0.00010035523978685614, + "loss": 2.5977, + "step": 1890 + }, + { + "epoch": 0.81, + "learning_rate": 9.813499111900533e-05, + "loss": 2.5966, + "step": 1900 + }, + { + "epoch": 0.82, + "learning_rate": 9.591474245115453e-05, + "loss": 2.5847, + "step": 1910 + }, + { + "epoch": 0.82, + "learning_rate": 9.369449378330373e-05, + "loss": 2.5082, + "step": 1920 + }, + { + "epoch": 0.82, + "learning_rate": 9.147424511545293e-05, + "loss": 2.6329, + "step": 1930 + }, + { + "epoch": 0.83, + "learning_rate": 8.925399644760215e-05, + "loss": 2.5748, + "step": 1940 + }, + { + "epoch": 0.83, + "learning_rate": 8.703374777975133e-05, + "loss": 2.7223, + "step": 1950 + }, + { + "epoch": 0.84, + "learning_rate": 8.481349911190053e-05, + "loss": 2.6406, + "step": 1960 + }, + { + "epoch": 0.84, + "learning_rate": 8.259325044404974e-05, + "loss": 2.5924, + "step": 1970 + }, + { + "epoch": 0.85, + "learning_rate": 8.037300177619894e-05, + "loss": 2.7957, + "step": 1980 + }, + { + "epoch": 0.85, + "learning_rate": 7.815275310834814e-05, + "loss": 2.5123, + "step": 1990 + }, + { + "epoch": 0.85, + "learning_rate": 7.593250444049734e-05, + "loss": 2.579, + "step": 2000 + }, + { + "epoch": 0.85, + "eval_loss": 0.48173585534095764, + "eval_rouge1": 0.8206909390534098, + "eval_rouge2": 0.7623428554475744, + "eval_rougeL": 0.7177491451029034, + "eval_rougeLsum": 0.8341139788232226, + "eval_runtime": 1978.7356, + "eval_samples_per_second": 0.505, + "eval_steps_per_second": 0.505, + "step": 2000 + } + ], + "max_steps": 2342, + "num_train_epochs": 1, + "total_flos": 4.738034481494016e+16, + "trial_name": null, + "trial_params": null +} diff --git a/mt5-new-summarize-final/checkpoint-2000/training_args.bin b/mt5-new-summarize-final/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc9d3b4d1721cf653ac83f80538b07e6a0b8161a --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a568dcedd606fd72b432d2c6cc678ea8c5e086885a5642ed616a886f52eb9 +size 3643 diff --git a/mt5-new-summarize-final/checkpoint-500/config.json b/mt5-new-summarize-final/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..139799c6bf6129096048bf33ed41a9cb2f2eb678 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "MT5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "length_penalty": 0.6, + "max_length": 128, + "model_type": "mt5", + "no_repeat_ngram_size": 2, + "num_beams": 15, + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.26.0", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/mt5-new-summarize-final/checkpoint-500/generation_config.json b/mt5-new-summarize-final/checkpoint-500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5767cc0cacebfd06884eb27ae1c796d3ca829fd2 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.26.0" +} diff --git a/mt5-new-summarize-final/checkpoint-500/optimizer.pt b/mt5-new-summarize-final/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..70ead67666b71695b1f05c6d6c902ea1e59a3037 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862e91e1511f48c42446f90fddd26797161ae5d0f9789080e8f3ed1aac89b679 +size 2879429 diff --git a/mt5-new-summarize-final/checkpoint-500/rng_state.pth b/mt5-new-summarize-final/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3381a05a740b1a2b7ccf9b9ff292fdd72d797468 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7569591bd924cae7a6b055850f4f744dea0b196b0796846f38580fc3438db6 +size 14575 diff --git a/mt5-new-summarize-final/checkpoint-500/scheduler.pt b/mt5-new-summarize-final/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..365730c48b2cf37eeee01f23e46d4349fd167a79 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52e8159d86a2671fd8a649cd3c2e0d18aace8a5ba6181d11c8e8237ec341dcf +size 627 diff --git a/mt5-new-summarize-final/checkpoint-500/special_tokens_map.json b/mt5-new-summarize-final/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-500/spiece.model b/mt5-new-summarize-final/checkpoint-500/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/mt5-new-summarize-final/checkpoint-500/tokenizer.json b/mt5-new-summarize-final/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..db5a8773175d9750d86374dc47b64f6d55615279 --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5 +size 16330369 diff --git a/mt5-new-summarize-final/checkpoint-500/tokenizer_config.json b/mt5-new-summarize-final/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9f3a5b62b2d1a57a16a8ae5ef280ed093e45bf --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "additional_special_tokens": null, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "google/mt5-small", + "pad_token": "", + "sp_model_kwargs": {}, + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json", + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/checkpoint-500/trainer_state.json b/mt5-new-summarize-final/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..443fb85c6e5fccf2e397f9adde37318949c8660d --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/trainer_state.json @@ -0,0 +1,316 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.213464258078288, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 20.8315, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001111111111111111, + "loss": 16.4875, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016666666666666666, + "loss": 10.6503, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002222222222222222, + "loss": 6.6236, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.2548, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003333333333333333, + "loss": 4.6801, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003888888888888889, + "loss": 4.3374, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004444444444444444, + "loss": 3.9614, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005, + "loss": 3.9704, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004977797513321492, + "loss": 3.8112, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004955595026642984, + "loss": 3.6272, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004933392539964477, + "loss": 3.5572, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004911190053285969, + "loss": 3.3966, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 0.000488898756660746, + "loss": 3.4632, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004866785079928952, + "loss": 3.4591, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004844582593250444, + "loss": 3.2218, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004822380106571936, + "loss": 3.4101, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00048001776198934283, + "loss": 3.2787, + "step": 180 + }, + { + "epoch": 0.08, + "learning_rate": 0.000477797513321492, + "loss": 3.1043, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004755772646536412, + "loss": 3.2435, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047335701598579037, + "loss": 3.2286, + "step": 210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00047113676731793964, + "loss": 3.1484, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 0.00046891651865008885, + "loss": 3.1817, + "step": 230 + }, + { + "epoch": 0.1, + "learning_rate": 0.000466696269982238, + "loss": 3.1608, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 0.00046447602131438723, + "loss": 3.1457, + "step": 250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004622557726465364, + "loss": 3.1212, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00046003552397868566, + "loss": 2.9937, + "step": 270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004578152753108348, + "loss": 3.2031, + "step": 280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00045559502664298403, + "loss": 3.0713, + "step": 290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004533747779751332, + "loss": 3.0352, + "step": 300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004511545293072824, + "loss": 3.0872, + "step": 310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004489342806394316, + "loss": 3.0049, + "step": 320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00044671403197158084, + "loss": 3.0659, + "step": 330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044449378330373, + "loss": 3.0852, + "step": 340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004422735346358792, + "loss": 2.9506, + "step": 350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00044005328596802843, + "loss": 3.027, + "step": 360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043783303730017764, + "loss": 2.8812, + "step": 370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00043561278863232686, + "loss": 2.9738, + "step": 380 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433392539964476, + "loss": 3.0504, + "step": 390 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043117229129662523, + "loss": 3.0195, + "step": 400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004289520426287744, + "loss": 2.968, + "step": 410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00042673179396092366, + "loss": 3.0864, + "step": 420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004245115452930728, + "loss": 2.8735, + "step": 430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00042229129662522204, + "loss": 3.0391, + "step": 440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004200710479573712, + "loss": 2.8288, + "step": 450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004178507992895204, + "loss": 3.0182, + "step": 460 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004156305506216697, + "loss": 3.0606, + "step": 470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00041341030195381884, + "loss": 2.9467, + "step": 480 + }, + { + "epoch": 0.21, + "learning_rate": 0.00041119005328596806, + "loss": 2.7842, + "step": 490 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004089698046181172, + "loss": 2.8472, + "step": 500 + } + ], + "max_steps": 2342, + "num_train_epochs": 1, + "total_flos": 1.18055554516992e+16, + "trial_name": null, + "trial_params": null +} diff --git a/mt5-new-summarize-final/checkpoint-500/training_args.bin b/mt5-new-summarize-final/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc9d3b4d1721cf653ac83f80538b07e6a0b8161a --- /dev/null +++ b/mt5-new-summarize-final/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a568dcedd606fd72b432d2c6cc678ea8c5e086885a5642ed616a886f52eb9 +size 3643 diff --git a/mt5-new-summarize-final/config.json b/mt5-new-summarize-final/config.json new file mode 100644 index 0000000000000000000000000000000000000000..139799c6bf6129096048bf33ed41a9cb2f2eb678 --- /dev/null +++ b/mt5-new-summarize-final/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "MT5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "length_penalty": 0.6, + "max_length": 128, + "model_type": "mt5", + "no_repeat_ngram_size": 2, + "num_beams": 15, + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.26.0", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/mt5-new-summarize-final/generation_config.json b/mt5-new-summarize-final/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5a5137b6c89be4407a330b5240bbd35976c1380 --- /dev/null +++ b/mt5-new-summarize-final/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "length_penalty": 0.6, + "max_length": 128, + "no_repeat_ngram_size": 2, + "num_beams": 15, + "pad_token_id": 0, + "transformers_version": "4.26.0" +} diff --git a/mt5-new-summarize-final/pytorch_model.bin b/mt5-new-summarize-final/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a3743160db14ff57e76b06366f1679e8d58a72e --- /dev/null +++ b/mt5-new-summarize-final/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2902dce4d66552f797fdd41548dcbc0682bf2ab91938e503552557c0d0f7cb22 +size 1200772485 diff --git a/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/1675000709.4761653/events.out.tfevents.1675000709.d0795309bd3b.4670.1 b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/1675000709.4761653/events.out.tfevents.1675000709.d0795309bd3b.4670.1 new file mode 100644 index 0000000000000000000000000000000000000000..791133eb3fd8ef804976527f140086a30e908bc7 --- /dev/null +++ b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/1675000709.4761653/events.out.tfevents.1675000709.d0795309bd3b.4670.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad27ec134114b9f9f4fd9a2e390c618677f1ec53f9636eed716424805f0d91f +size 5988 diff --git a/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675000709.d0795309bd3b.4670.0 b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675000709.d0795309bd3b.4670.0 new file mode 100644 index 0000000000000000000000000000000000000000..9be6a18c50d9699b7a6c58f1506a8ac3b6794ccb --- /dev/null +++ b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675000709.d0795309bd3b.4670.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ef820c793c89b6e6c4e554c2a7eb554edfb58be6f33776d7ad79c242fa8da8 +size 42224 diff --git a/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675016428.d0795309bd3b.4670.2 b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675016428.d0795309bd3b.4670.2 new file mode 100644 index 0000000000000000000000000000000000000000..4da9a4dc1a532d27dcdb7565cf4c4fbc390a9599 --- /dev/null +++ b/mt5-new-summarize-final/runs/Jan29_13-58-29_d0795309bd3b/events.out.tfevents.1675016428.d0795309bd3b.4670.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539dd1948a8a60fbe98568cd0687fe881902c35784926e7915855bac1ddc7d21 +size 514 diff --git a/mt5-new-summarize-final/special_tokens_map.json b/mt5-new-summarize-final/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/mt5-new-summarize-final/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/spiece.model b/mt5-new-summarize-final/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/mt5-new-summarize-final/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/mt5-new-summarize-final/tokenizer.json b/mt5-new-summarize-final/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..db5a8773175d9750d86374dc47b64f6d55615279 --- /dev/null +++ b/mt5-new-summarize-final/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5 +size 16330369 diff --git a/mt5-new-summarize-final/tokenizer_config.json b/mt5-new-summarize-final/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9f3a5b62b2d1a57a16a8ae5ef280ed093e45bf --- /dev/null +++ b/mt5-new-summarize-final/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "additional_special_tokens": null, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "name_or_path": "google/mt5-small", + "pad_token": "", + "sp_model_kwargs": {}, + "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json", + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/mt5-new-summarize-final/training_args.bin b/mt5-new-summarize-final/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc9d3b4d1721cf653ac83f80538b07e6a0b8161a --- /dev/null +++ b/mt5-new-summarize-final/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a568dcedd606fd72b432d2c6cc678ea8c5e086885a5642ed616a886f52eb9 +size 3643