Saving weights and logs of step 10000
Browse files- config.json +4 -3
- events.out.tfevents.1639865567.t1v-n-8eba1090-w-0.1317510.0.v2 +3 -0
- flax_model.msgpack +2 -2
- pytorch_model.bin +3 -0
- run_mlm_flax.py +5 -1
- run_wechsel.py +5 -6
- start_train.sh +3 -1
config.json
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
7 |
"bos_token_id": 0,
|
8 |
"classifier_dropout": null,
|
9 |
"eos_token_id": 2,
|
|
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 1024,
|
@@ -19,7 +20,7 @@
|
|
19 |
"num_hidden_layers": 24,
|
20 |
"pad_token_id": 1,
|
21 |
"position_embedding_type": "absolute",
|
22 |
-
"torch_dtype": "
|
23 |
"transformers_version": "4.13.0.dev0",
|
24 |
"type_vocab_size": 1,
|
25 |
"use_cache": true,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./",
|
3 |
"architectures": [
|
4 |
+
"RobertaForMaskedLM"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
7 |
"bos_token_id": 0,
|
8 |
"classifier_dropout": null,
|
9 |
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
"hidden_act": "gelu",
|
12 |
"hidden_dropout_prob": 0.1,
|
13 |
"hidden_size": 1024,
|
|
|
20 |
"num_hidden_layers": 24,
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "float32",
|
24 |
"transformers_version": "4.13.0.dev0",
|
25 |
"type_vocab_size": 1,
|
26 |
"use_cache": true,
|
events.out.tfevents.1639865567.t1v-n-8eba1090-w-0.1317510.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90044db50f3fbfe6af4870c6eb29f5f290fbda578d2fffb342644aff2fa5a1cf
|
3 |
+
size 1471447
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed2451f3cd9fff0108476a448868d5478995e86c8bac4935e33645d62109b5de
|
3 |
+
size 1421662309
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:741983451ebd3f767044f9f28f8ad4621e946e22b9dac19ea0612e304300c307
|
3 |
+
size 1421807019
|
run_mlm_flax.py
CHANGED
@@ -164,6 +164,10 @@ class ModelArguments:
|
|
164 |
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
165 |
},
|
166 |
)
|
|
|
|
|
|
|
|
|
167 |
|
168 |
|
169 |
@dataclass
|
@@ -608,7 +612,7 @@ def main():
|
|
608 |
|
609 |
if model_args.model_name_or_path:
|
610 |
model = FlaxAutoModelForMaskedLM.from_pretrained(
|
611 |
-
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
612 |
)
|
613 |
else:
|
614 |
model = FlaxAutoModelForMaskedLM.from_config(
|
|
|
164 |
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
165 |
},
|
166 |
)
|
167 |
+
from_pytorch: bool = field(
|
168 |
+
default=False,
|
169 |
+
metadata={"help": "Whether to use Pytorch model checkpoint for weight initialization or not."},
|
170 |
+
)
|
171 |
|
172 |
|
173 |
@dataclass
|
|
|
612 |
|
613 |
if model_args.model_name_or_path:
|
614 |
model = FlaxAutoModelForMaskedLM.from_pretrained(
|
615 |
+
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), from_pt=model_args.from_pytorch
|
616 |
)
|
617 |
else:
|
618 |
model = FlaxAutoModelForMaskedLM.from_config(
|
run_wechsel.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import torch
|
2 |
-
from transformers import
|
3 |
from datasets import load_dataset
|
4 |
from wechsel import WECHSEL, load_embeddings
|
5 |
|
6 |
source_tokenizer = AutoTokenizer.from_pretrained("roberta-large")
|
7 |
-
model =
|
8 |
|
9 |
target_tokenizer = AutoTokenizer.from_pretrained("./")
|
10 |
|
@@ -20,9 +20,8 @@ target_embeddings, info = wechsel.apply(
|
|
20 |
model.get_input_embeddings().weight.detach().numpy(),
|
21 |
)
|
22 |
|
23 |
-
model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings)
|
24 |
-
|
25 |
model.save_pretrained("./")
|
26 |
|
27 |
-
flax_model =
|
28 |
-
flax_model.save_pretrained("./")
|
|
|
1 |
import torch
|
2 |
+
from transformers import AutoModelForMaskedLM, AutoTokenizer, FlaxAutoModelForMaskedLM
|
3 |
from datasets import load_dataset
|
4 |
from wechsel import WECHSEL, load_embeddings
|
5 |
|
6 |
source_tokenizer = AutoTokenizer.from_pretrained("roberta-large")
|
7 |
+
model = AutoModelForMaskedLM.from_pretrained("roberta-large")
|
8 |
|
9 |
target_tokenizer = AutoTokenizer.from_pretrained("./")
|
10 |
|
|
|
20 |
model.get_input_embeddings().weight.detach().numpy(),
|
21 |
)
|
22 |
|
23 |
+
model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings).to(torch.float32)
|
|
|
24 |
model.save_pretrained("./")
|
25 |
|
26 |
+
# flax_model = FlaxAutoModelForMaskedLM.from_pretrained("./", from_pt=True)
|
27 |
+
# flax_model.save_pretrained("./")
|
start_train.sh
CHANGED
@@ -5,9 +5,10 @@ export USE_TORCH=0
|
|
5 |
python3 run_mlm_flax.py \
|
6 |
--output_dir="./" \
|
7 |
--model_name_or_path="./" \
|
|
|
8 |
--config_name="./" \
|
9 |
--tokenizer_name="./" \
|
10 |
-
--dataset_filepath="/researchdisk/
|
11 |
--max_seq_length="128" \
|
12 |
--pad_to_max_length \
|
13 |
--preprocessing_num_workers="64" \
|
@@ -25,5 +26,6 @@ python3 run_mlm_flax.py \
|
|
25 |
--eval_steps="10000" \
|
26 |
--logging_steps="500" \
|
27 |
--dtype="bfloat16" \
|
|
|
28 |
--push_to_hub \
|
29 |
--hub_model_id="Finnish-NLP/roberta-large-wechsel-finnish"
|
|
|
5 |
python3 run_mlm_flax.py \
|
6 |
--output_dir="./" \
|
7 |
--model_name_or_path="./" \
|
8 |
+
--from_pytorch \
|
9 |
--config_name="./" \
|
10 |
--tokenizer_name="./" \
|
11 |
+
--dataset_filepath="/researchdisk/training_dataset_full_deduplicated" \
|
12 |
--max_seq_length="128" \
|
13 |
--pad_to_max_length \
|
14 |
--preprocessing_num_workers="64" \
|
|
|
26 |
--eval_steps="10000" \
|
27 |
--logging_steps="500" \
|
28 |
--dtype="bfloat16" \
|
29 |
+
--adafactor \
|
30 |
--push_to_hub \
|
31 |
--hub_model_id="Finnish-NLP/roberta-large-wechsel-finnish"
|