|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
import numpy as np |
|
import pytest |
|
from pytorch_lightning.trainer.trainer import Trainer |
|
|
|
from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel |
|
from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam |
|
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy |
|
|
|
|
|
class TestGPTEval: |
|
@pytest.mark.run_only_on('GPU') |
|
def setup_method(self, test_method): |
|
trainer_config = { |
|
"devices": 1, |
|
"num_nodes": 1, |
|
"accelerator": "gpu", |
|
"logger": False, |
|
"precision": 16, |
|
} |
|
tensor_model_parallel_size = 1 |
|
pipeline_model_parallel_size = 1 |
|
model_file = '/home/TestData/nlp/megatron_gpt/125M/megatron_gpt.nemo' |
|
|
|
|
|
trainer = Trainer(strategy=NLPDDPStrategy(), **trainer_config) |
|
assert ( |
|
trainer_config["devices"] * trainer_config['num_nodes'] |
|
== tensor_model_parallel_size * pipeline_model_parallel_size |
|
), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" |
|
|
|
model = MegatronGPTModel.restore_from(restore_path=model_file, trainer=trainer) |
|
model.freeze() |
|
|
|
|
|
try: |
|
model.model.language_model.encoder.activations_checkpoint_method = None |
|
except AttributeError: |
|
pass |
|
|
|
self.model = model |
|
|
|
|
|
|
|
@pytest.mark.skipif(True, reason='skip') |
|
@pytest.mark.run_only_on('GPU') |
|
@pytest.mark.unit |
|
@pytest.mark.skip() |
|
|
|
def test_gpt_eval(self): |
|
|
|
length_params: LengthParam = { |
|
"max_length": 30, |
|
"min_length": 0, |
|
} |
|
|
|
sampling_params: SamplingParam = { |
|
"use_greedy": True, |
|
"temperature": 1.0, |
|
"top_k": 0, |
|
"top_p": 1.0, |
|
"repetition_penalty": 1.0, |
|
"add_BOS": True, |
|
"all_probs": False, |
|
"compute_logprob": False, |
|
} |
|
|
|
|
|
sampling_params["compute_logprob"] = True |
|
sentence = 'run gpt in inference mode' |
|
response = self.model.generate(inputs=[sentence], length_params=length_params, sampling_params=sampling_params) |
|
assert response["sentences"][0] == sentence |
|
gt_token_ids = [5143, 308, 457, 287, 32278, 4235] |
|
assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids) |
|
assert len(response['full_logprob'][0]) == 5 |
|
gt_log_prob = [ |
|
-7.9579081535339355, |
|
-7.195970058441162, |
|
-5.269130706787109, |
|
-12.75404167175293, |
|
-4.631799697875977, |
|
] |
|
assert np.allclose(np.array(response['logprob'][0]), gt_log_prob, atol=1e-4) |
|
gt_offsets = [0, 3, 5, 7, 10, 20] |
|
assert np.array_equal(np.array(response['offsets'][0]), gt_offsets) |
|
|
|
|
|
sampling_params["compute_logprob"] = False |
|
sampling_params["use_greedy"] = False |
|
sampling_params["top_p"] = 0.8 |
|
sampling_params["repetition_penalty"] = 1.2 |
|
|
|
gt_token_ids = [ |
|
50256, |
|
15, |
|
59, |
|
198, |
|
59, |
|
2, |
|
16, |
|
59, |
|
2, |
|
17, |
|
58, |
|
57, |
|
59, |
|
62, |
|
37, |
|
7, |
|
39, |
|
15437, |
|
90, |
|
92, |
|
357, |
|
2481, |
|
8, |
|
3467, |
|
2, |
|
18, |
|
30109, |
|
9, |
|
43215, |
|
13, |
|
5416, |
|
] |
|
gt_text = '0\\\n\\#1\\#2[Z\\_F(H)]{} (21) \\#3[[*Phys. Rev' |
|
response = self.model.generate(inputs=[''], length_params=length_params, sampling_params=sampling_params) |
|
assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids) |
|
assert response['sentences'][0] == gt_text |
|
|