# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import pytest from pytorch_lightning.trainer.trainer import Trainer from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy class TestGPTEval: @pytest.mark.run_only_on('GPU') def setup_method(self, test_method): trainer_config = { "devices": 1, "num_nodes": 1, "accelerator": "gpu", "logger": False, "precision": 16, } tensor_model_parallel_size = 1 pipeline_model_parallel_size = 1 model_file = '/home/TestData/nlp/megatron_gpt/125M/megatron_gpt.nemo' # trainer required for restoring model parallel models trainer = Trainer(strategy=NLPDDPStrategy(), **trainer_config) assert ( trainer_config["devices"] * trainer_config['num_nodes'] == tensor_model_parallel_size * pipeline_model_parallel_size ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" model = MegatronGPTModel.restore_from(restore_path=model_file, trainer=trainer) model.freeze() # has to turn off activations_checkpoint_method for inference try: model.model.language_model.encoder.activations_checkpoint_method = None except AttributeError: pass self.model = model # @pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine') # skip this unit test for now. need to investigate the numerical issue @pytest.mark.skipif(True, reason='skip') @pytest.mark.run_only_on('GPU') @pytest.mark.unit @pytest.mark.skip() # TODO renable the test def test_gpt_eval(self): # test greedy length_params: LengthParam = { "max_length": 30, "min_length": 0, } sampling_params: SamplingParam = { "use_greedy": True, "temperature": 1.0, "top_k": 0, "top_p": 1.0, "repetition_penalty": 1.0, "add_BOS": True, "all_probs": False, "compute_logprob": False, } # test logprob sampling_params["compute_logprob"] = True sentence = 'run gpt in inference mode' response = self.model.generate(inputs=[sentence], length_params=length_params, sampling_params=sampling_params) assert response["sentences"][0] == sentence gt_token_ids = [5143, 308, 457, 287, 32278, 4235] assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids) assert len(response['full_logprob'][0]) == 5 gt_log_prob = [ -7.9579081535339355, -7.195970058441162, -5.269130706787109, -12.75404167175293, -4.631799697875977, ] assert np.allclose(np.array(response['logprob'][0]), gt_log_prob, atol=1e-4) gt_offsets = [0, 3, 5, 7, 10, 20] assert np.array_equal(np.array(response['offsets'][0]), gt_offsets) # # test top_p sampling_params["compute_logprob"] = False sampling_params["use_greedy"] = False sampling_params["top_p"] = 0.8 sampling_params["repetition_penalty"] = 1.2 gt_token_ids = [ 50256, 15, 59, 198, 59, 2, 16, 59, 2, 17, 58, 57, 59, 62, 37, 7, 39, 15437, 90, 92, 357, 2481, 8, 3467, 2, 18, 30109, 9, 43215, 13, 5416, ] gt_text = '0\\\n\\#1\\#2[Z\\_F(H)]{} (21) \\#3[[*Phys. Rev' response = self.model.generate(inputs=[''], length_params=length_params, sampling_params=sampling_params) assert np.array_equal(np.array(response['token_ids'][0]), gt_token_ids) assert response['sentences'][0] == gt_text