NeMo / tests /collections /nlp /test_gpt_eval.py
camenduru's picture
thanks to NVIDIA ❤
7934b29
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
from pytorch_lightning.trainer.trainer import Trainer
from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
class TestGPTEval:
@pytest.mark.run_only_on('GPU')
def setup_method(self, test_method):
trainer_config = {
"devices": 1,
"num_nodes": 1,
"accelerator": "gpu",
"logger": False,
"precision": 16,
}
tensor_model_parallel_size = 1
pipeline_model_parallel_size = 1
model_file = '/home/TestData/nlp/megatron_gpt/125M/megatron_gpt.nemo'
# trainer required for restoring model parallel models
trainer = Trainer(strategy=NLPDDPStrategy(), **trainer_config)
assert (
trainer_config["devices"] * trainer_config['num_nodes']
== tensor_model_parallel_size * pipeline_model_parallel_size
), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size"
model = MegatronGPTModel.restore_from(restore_path=model_file, trainer=trainer)
model.freeze()
# has to turn off activations_checkpoint_method for inference
try:
model.model.language_model.encoder.activations_checkpoint_method = None
except AttributeError:
pass
self.model = model
# @pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine')
# skip this unit test for now. need to investigate the numerical issue
@pytest.mark.skipif(True, reason='skip')
@pytest.mark.run_only_on('GPU')
@pytest.mark.unit
@pytest.mark.skip()
# TODO renable the test
def test_gpt_eval(self):
# test greedy
length_params: LengthParam = {
"max_length": 30,
"min_length": 0,
}
sampling_params: SamplingParam = {
"use_greedy": True,
"temperature": 1.0,
"top_k": 0,
"top_p": 1.0,
"repetition_penalty": 1.0,
"add_BOS": True,
"all_probs": False,
"compute_logprob": False,
}
# test logprob
sampling_params["compute_logprob"] = True
sentence = 'run gpt in inference mode'
response = self.model.generate(inputs=[sentence], length_params=length_params, sampling_params=sampling_params)
assert response["sentences"][0] == sentence
gt_token_ids = [5143, 308, 457, 287, 32278, 4235]
assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids)
assert len(response['full_logprob'][0]) == 5
gt_log_prob = [
-7.9579081535339355,
-7.195970058441162,
-5.269130706787109,
-12.75404167175293,
-4.631799697875977,
]
assert np.allclose(np.array(response['logprob'][0]), gt_log_prob, atol=1e-4)
gt_offsets = [0, 3, 5, 7, 10, 20]
assert np.array_equal(np.array(response['offsets'][0]), gt_offsets)
# # test top_p
sampling_params["compute_logprob"] = False
sampling_params["use_greedy"] = False
sampling_params["top_p"] = 0.8
sampling_params["repetition_penalty"] = 1.2
gt_token_ids = [
50256,
15,
59,
198,
59,
2,
16,
59,
2,
17,
58,
57,
59,
62,
37,
7,
39,
15437,
90,
92,
357,
2481,
8,
3467,
2,
18,
30109,
9,
43215,
13,
5416,
]
gt_text = '0\\\n\\#1\\#2[Z\\_F(H)]{} (21) \\#3[[*Phys. Rev'
response = self.model.generate(inputs=[''], length_params=length_params, sampling_params=sampling_params)
assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids)
assert response['sentences'][0] == gt_text