|
{ |
|
"tgt_vocab_size": 20000, |
|
"n_sample": 0, |
|
"src_vocab_size": 20000, |
|
"valid_metrics": [ |
|
"BLEU" |
|
], |
|
"tgt_vocab": "en.eole.vocab", |
|
"tensorboard_log_dir": "tensorboard", |
|
"tensorboard_log_dir_dated": "tensorboard/May-21_06-04-15", |
|
"seed": 1234, |
|
"transforms": [ |
|
"sentencepiece", |
|
"filtertoolong" |
|
], |
|
"overwrite": true, |
|
"tensorboard": true, |
|
"src_vocab": "bg.eole.vocab", |
|
"share_vocab": false, |
|
"vocab_size_multiple": 8, |
|
"save_data": "data", |
|
"report_every": 100, |
|
"training": { |
|
"gpu_ranks": [ |
|
0 |
|
], |
|
"batch_size_multiple": 8, |
|
"world_size": 1, |
|
"accum_steps": [ |
|
0 |
|
], |
|
"compute_dtype": "torch.float16", |
|
"num_workers": 0, |
|
"dropout": [ |
|
0.1 |
|
], |
|
"learning_rate": 2.0, |
|
"save_checkpoint_steps": 5000, |
|
"max_grad_norm": 0.0, |
|
"valid_batch_size": 4096, |
|
"batch_size": 8000, |
|
"train_steps": 100000, |
|
"batch_type": "tokens", |
|
"average_decay": 0.0001, |
|
"prefetch_factor": 32, |
|
"accum_count": [ |
|
10 |
|
], |
|
"model_path": "quickmt-bg-en-eole-model", |
|
"valid_steps": 5000, |
|
"optim": "adamw", |
|
"attention_dropout": [ |
|
0.1 |
|
], |
|
"label_smoothing": 0.1, |
|
"warmup_steps": 4000, |
|
"dropout_steps": [ |
|
0 |
|
], |
|
"normalization": "tokens", |
|
"bucket_size": 128000, |
|
"keep_checkpoint": 4, |
|
"decay_method": "noam", |
|
"adam_beta2": 0.998, |
|
"param_init_method": "xavier_uniform" |
|
}, |
|
"transforms_configs": { |
|
"filtertoolong": { |
|
"tgt_seq_length": 256, |
|
"src_seq_length": 256 |
|
}, |
|
"sentencepiece": { |
|
"tgt_subword_model": "${MODEL_PATH}/en.spm.model", |
|
"src_subword_model": "${MODEL_PATH}/bg.spm.model" |
|
} |
|
}, |
|
"data": { |
|
"corpus_1": { |
|
"path_align": null, |
|
"transforms": [ |
|
"sentencepiece", |
|
"filtertoolong" |
|
], |
|
"path_tgt": "train.en", |
|
"path_src": "train.bg" |
|
}, |
|
"valid": { |
|
"path_align": null, |
|
"transforms": [ |
|
"sentencepiece", |
|
"filtertoolong" |
|
], |
|
"path_tgt": "dev.en", |
|
"path_src": "dev.bg" |
|
} |
|
}, |
|
"model": { |
|
"transformer_ff": 4096, |
|
"architecture": "transformer", |
|
"heads": 8, |
|
"share_decoder_embeddings": false, |
|
"share_embeddings": false, |
|
"position_encoding_type": "SinusoidalInterleaved", |
|
"hidden_size": 1024, |
|
"decoder": { |
|
"decoder_type": "transformer", |
|
"layers": 2, |
|
"transformer_ff": 4096, |
|
"heads": 8, |
|
"tgt_word_vec_size": 1024, |
|
"n_positions": null, |
|
"position_encoding_type": "SinusoidalInterleaved", |
|
"hidden_size": 1024 |
|
}, |
|
"encoder": { |
|
"layers": 8, |
|
"transformer_ff": 4096, |
|
"heads": 8, |
|
"src_word_vec_size": 1024, |
|
"encoder_type": "transformer", |
|
"n_positions": null, |
|
"position_encoding_type": "SinusoidalInterleaved", |
|
"hidden_size": 1024 |
|
}, |
|
"embeddings": { |
|
"word_vec_size": 1024, |
|
"src_word_vec_size": 1024, |
|
"position_encoding_type": "SinusoidalInterleaved", |
|
"tgt_word_vec_size": 1024 |
|
} |
|
} |
|
} |