dmitry-vorobiev
commited on
Commit
·
a993fba
1
Parent(s):
5c346e9
upd weights: 99% ria, title=36
Browse files- README.md +5 -6
- config.json +5 -3
- pytorch_model.bin +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -12,7 +12,7 @@ license: MIT
|
|
12 |
|
13 |
## Description
|
14 |
*bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
|
15 |
-
fine-tuned on the first
|
16 |
|
17 |
## Usage example
|
18 |
|
@@ -35,7 +35,7 @@ encoded_batch = tokenizer.prepare_seq2seq_batch(
|
|
35 |
|
36 |
output_ids = model.generate(
|
37 |
input_ids=encoded_batch["input_ids"],
|
38 |
-
max_length=
|
39 |
no_repeat_ngram_size=3,
|
40 |
num_beams=5,
|
41 |
top_k=0
|
@@ -80,7 +80,6 @@ python nlp_headline_rus/src/train_seq2seq.py \
|
|
80 |
|
81 |
## Validation results
|
82 |
|
83 |
-
- Using [last 1% of ria](https://drive.google.com/drive/folders/
|
84 |
-
- Using [
|
85 |
-
- Using [gazeta_ru
|
86 |
-
- Using [gazeta_ru val](https://drive.google.com/drive/folders/1BLiL3H0n56e8Q9jSuDgaH_3LLpmKxuVG) split
|
|
|
12 |
|
13 |
## Description
|
14 |
*bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
|
15 |
+
fine-tuned on the first 99% of ["Rossiya Segodnya" news dataset](https://github.com/RossiyaSegodnya/ria_news_dataset) for 2 epochs.
|
16 |
|
17 |
## Usage example
|
18 |
|
|
|
35 |
|
36 |
output_ids = model.generate(
|
37 |
input_ids=encoded_batch["input_ids"],
|
38 |
+
max_length=36,
|
39 |
no_repeat_ngram_size=3,
|
40 |
num_beams=5,
|
41 |
top_k=0
|
|
|
80 |
|
81 |
## Validation results
|
82 |
|
83 |
+
- Using [last 1% of ria](https://drive.google.com/drive/folders/1ztAeyb1BiLMgXwOgOJS7WMR4PGiI1q92) dataset
|
84 |
+
- Using [gazeta_ru test](https://drive.google.com/drive/folders/1CyowuRpecsLTcDbqEfmAvkCWOod58g_e) split
|
85 |
+
- Using [gazeta_ru val](https://drive.google.com/drive/folders/1XZFOXHSXLKdhzm61ceVLw3aautrdskIu) split
|
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/kaggle/input/
|
3 |
"architectures": [
|
4 |
"EncoderDecoderModel"
|
5 |
],
|
@@ -19,6 +19,7 @@
|
|
19 |
"diversity_penalty": 0.0,
|
20 |
"do_sample": false,
|
21 |
"early_stopping": false,
|
|
|
22 |
"eos_token_id": null,
|
23 |
"finetuning_task": null,
|
24 |
"gradient_checkpointing": false,
|
@@ -74,7 +75,7 @@
|
|
74 |
"top_k": 50,
|
75 |
"top_p": 1.0,
|
76 |
"torchscript": false,
|
77 |
-
"transformers_version": "4.
|
78 |
"type_vocab_size": 2,
|
79 |
"use_bfloat16": false,
|
80 |
"use_cache": true,
|
@@ -98,6 +99,7 @@
|
|
98 |
"diversity_penalty": 0.0,
|
99 |
"do_sample": false,
|
100 |
"early_stopping": false,
|
|
|
101 |
"eos_token_id": null,
|
102 |
"finetuning_task": null,
|
103 |
"gradient_checkpointing": false,
|
@@ -153,7 +155,7 @@
|
|
153 |
"top_k": 50,
|
154 |
"top_p": 1.0,
|
155 |
"torchscript": false,
|
156 |
-
"transformers_version": "4.
|
157 |
"type_vocab_size": 2,
|
158 |
"use_bfloat16": false,
|
159 |
"use_cache": true,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9",
|
3 |
"architectures": [
|
4 |
"EncoderDecoderModel"
|
5 |
],
|
|
|
19 |
"diversity_penalty": 0.0,
|
20 |
"do_sample": false,
|
21 |
"early_stopping": false,
|
22 |
+
"encoder_no_repeat_ngram_size": 0,
|
23 |
"eos_token_id": null,
|
24 |
"finetuning_task": null,
|
25 |
"gradient_checkpointing": false,
|
|
|
75 |
"top_k": 50,
|
76 |
"top_p": 1.0,
|
77 |
"torchscript": false,
|
78 |
+
"transformers_version": "4.3.2",
|
79 |
"type_vocab_size": 2,
|
80 |
"use_bfloat16": false,
|
81 |
"use_cache": true,
|
|
|
99 |
"diversity_penalty": 0.0,
|
100 |
"do_sample": false,
|
101 |
"early_stopping": false,
|
102 |
+
"encoder_no_repeat_ngram_size": 0,
|
103 |
"eos_token_id": null,
|
104 |
"finetuning_task": null,
|
105 |
"gradient_checkpointing": false,
|
|
|
155 |
"top_k": 50,
|
156 |
"top_p": 1.0,
|
157 |
"torchscript": false,
|
158 |
+
"transformers_version": "4.3.2",
|
159 |
"type_vocab_size": 2,
|
160 |
"use_bfloat16": false,
|
161 |
"use_cache": true,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 827914439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71f249206ee2da240fc75f3b8d228ceee50861ff493ac0b6437e2509ad2754e0
|
3 |
size 827914439
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9", "do_basic_tokenize": true, "never_split": null}
|