|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import shutil |
|
import tempfile |
|
|
|
import pytest |
|
import torch |
|
from omegaconf import DictConfig, OmegaConf |
|
|
|
from nemo.collections.nlp.models import MTEncDecModel |
|
from nemo.collections.nlp.models.machine_translation.mt_enc_dec_config import AAYNBaseConfig |
|
|
|
|
|
def export_test(model, suffix): |
|
with tempfile.TemporaryDirectory() as restore_folder: |
|
filename = os.path.join(restore_folder, 'nmt' + suffix) |
|
enc_filename = os.path.join(restore_folder, 'encoder-nmt' + suffix) |
|
dec_filename = os.path.join(restore_folder, 'decoder-nmt' + suffix) |
|
model.export(output=filename, check_trace=True) |
|
assert os.path.exists(enc_filename) |
|
assert os.path.exists(dec_filename) |
|
|
|
|
|
def get_cfg(): |
|
cfg = AAYNBaseConfig() |
|
cfg.encoder_tokenizer.tokenizer_name = 'yttm' |
|
cfg.encoder_tokenizer.tokenizer_model = 'tests/.data/yttm.4096.en-de.model' |
|
cfg.decoder_tokenizer.tokenizer_name = 'yttm' |
|
cfg.decoder_tokenizer.tokenizer_model = 'tests/.data/yttm.4096.en-de.model' |
|
cfg.train_ds = None |
|
cfg.validation_ds = None |
|
cfg.test_ds = None |
|
return cfg |
|
|
|
|
|
class TestMTEncDecModel: |
|
@pytest.mark.unit |
|
def test_creation_saving_restoring(self): |
|
model = MTEncDecModel(cfg=get_cfg()) |
|
assert isinstance(model, MTEncDecModel) |
|
|
|
with tempfile.TemporaryDirectory() as restore_folder: |
|
with tempfile.TemporaryDirectory() as save_folder: |
|
save_folder_path = save_folder |
|
|
|
model_save_path = os.path.join(save_folder, f"{model.__class__.__name__}.nemo") |
|
model.save_to(save_path=model_save_path) |
|
|
|
model_restore_path = os.path.join(restore_folder, f"{model.__class__.__name__}.nemo") |
|
shutil.copy(model_save_path, model_restore_path) |
|
|
|
assert save_folder_path is not None and not os.path.exists(save_folder_path) |
|
assert not os.path.exists(model_save_path) |
|
assert os.path.exists(model_restore_path) |
|
|
|
model_copy = model.__class__.restore_from(restore_path=model_restore_path) |
|
assert model.num_weights == model_copy.num_weights |
|
|
|
@pytest.mark.unit |
|
def test_no_artifact_name_collision(self): |
|
model = MTEncDecModel(cfg=get_cfg()) |
|
assert isinstance(model, MTEncDecModel) |
|
with tempfile.TemporaryDirectory() as tmpdir1: |
|
model.save_to("nmt_model.nemo") |
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
model._save_restore_connector._unpack_nemo_file(path2file="nmt_model.nemo", out_folder=tmpdir) |
|
conf = OmegaConf.load(os.path.join(tmpdir, "model_config.yaml")) |
|
|
|
assert conf.encoder_tokenizer.tokenizer_model != conf.decoder_tokenizer.tokenizer_model |
|
|
|
assert conf.encoder_tokenizer.tokenizer_model.startswith("nemo:") |
|
assert conf.decoder_tokenizer.tokenizer_model.startswith("nemo:") |
|
|
|
assert os.path.exists(os.path.join(tmpdir, conf.encoder_tokenizer.tokenizer_model[5:])) |
|
assert os.path.exists(os.path.join(tmpdir, conf.decoder_tokenizer.tokenizer_model[5:])) |
|
|
|
@pytest.mark.unit |
|
def test_train_eval_loss(self): |
|
cfg = get_cfg() |
|
cfg.label_smoothing = 0.5 |
|
model = MTEncDecModel(cfg=cfg) |
|
assert isinstance(model, MTEncDecModel) |
|
batch_size = 10 |
|
time = 32 |
|
vocab_size = 32000 |
|
torch.manual_seed(42) |
|
tgt_ids = torch.LongTensor(batch_size, time).random_(1, model.decoder_tokenizer.vocab_size) |
|
logits = torch.FloatTensor(batch_size, time, vocab_size).random_(-1, 1) |
|
log_probs = torch.nn.functional.log_softmax(logits, dim=-1) |
|
train_loss = model.loss_fn(log_probs=log_probs, labels=tgt_ids) |
|
eval_loss = model.eval_loss_fn(log_probs=log_probs, labels=tgt_ids) |
|
assert not torch.allclose(train_loss, eval_loss) |
|
|
|
cfg.label_smoothing = 0 |
|
model = MTEncDecModel(cfg=cfg) |
|
|
|
train_loss = model.loss_fn(log_probs=log_probs, labels=tgt_ids) |
|
eval_loss = model.eval_loss_fn(log_probs=log_probs, labels=tgt_ids) |
|
assert torch.allclose(train_loss, eval_loss) |
|
|
|
@pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine') |
|
@pytest.mark.run_only_on('GPU') |
|
@pytest.mark.unit |
|
def test_gpu_export_ts(self): |
|
model = MTEncDecModel(cfg=get_cfg()).cuda() |
|
assert isinstance(model, MTEncDecModel) |
|
export_test(model, ".ts") |
|
|
|
@pytest.mark.run_only_on('GPU') |
|
@pytest.mark.unit |
|
def test_gpu_export_onnx(self): |
|
model = MTEncDecModel(cfg=get_cfg()).cuda() |
|
assert isinstance(model, MTEncDecModel) |
|
export_test(model, ".onnx") |
|
|
|
|
|
if __name__ == "__main__": |
|
t = TestMTEncDecModel() |
|
|
|
t.test_train_eval_loss() |
|
|