NeMo / tests /collections /nlp /test_gpt_model.py

thanks to NVIDIA ❤

7934b29 over 2 years ago

7.86 kB

	# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os

	import pytest
	import torch
	from omegaconf import DictConfig
	from pytorch_lightning import Trainer

	from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
	from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
	from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids
	from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy

	DEVICE_CAPABILITY = None
	if torch.cuda.is_available():
	DEVICE_CAPABILITY = torch.cuda.get_device_capability()


	@pytest.fixture()
	def model_cfg(test_data_dir):

	model_cfg = {
	'precision': 16,
	'micro_batch_size': 4,
	'global_batch_size': 8,
	'tensor_model_parallel_size': 1,
	'pipeline_model_parallel_size': 1,
	'resume_from_checkpoint': None,
	'encoder_seq_length': 512,
	'max_position_embeddings': 512,
	'num_layers': 1,
	'hidden_size': 128,
	'ffn_hidden_size': 512,
	'num_attention_heads': 2,
	'init_method_std': 0.02,
	'hidden_dropout': 0.1,
	'kv_channels': None,
	'apply_query_key_layer_scaling': True,
	'layernorm_epsilon': 1e-5,
	'make_vocab_size_divisible_by': 128,
	'pre_process': True,
	'post_process': True,
	'persist_layer_norm': True,
	'gradient_as_bucket_view': True,
	'tokenizer': {
	'library': 'megatron',
	'type': 'GPT2BPETokenizer',
	'model': None,
	'vocab_file': os.path.join(test_data_dir, 'nlp/gpt_vocab_merges/vocab.json'),
	'merge_file': os.path.join(test_data_dir, 'nlp/gpt_vocab_merges/merges.txt'),
	'delimiter': None,
	},
	'native_amp_init_scale': 4294967296,
	'native_amp_growth_interval': 1000,
	'hysteresis': 2,
	'fp32_residual_connection': False,
	'fp16_lm_cross_entropy': False,
	'megatron_amp_O2': False,
	'seed': 1234,
	'use_cpu_initialization': False,
	'onnx_safe': False,
	'apex_transformer_log_level': 30,
	'activations_checkpoint_method': None,
	'activations_checkpoint_num_layers': 1,
	'data': {
	'data_prefix': '???',
	'index_mapping_dir': None,
	'data_impl': 'mmap',
	'splits_string': '900,50,50',
	'seq_length': 512,
	'skip_warmup': True,
	'num_workers': 2,
	'dataloader_type': 'single',
	'reset_position_ids': False,
	'reset_attention_mask': False,
	'eod_mask_loss': False,
	},
	'optim': {
	'name': 'fused_adam',
	'lr': 2e-4,
	'weight_decay': 0.01,
	'betas': [0.9, 0.98],
	'sched': {'name': 'CosineAnnealing', 'warmup_steps': 500, 'constant_steps': 50000, 'min_lr': '2e-5'},
	},
	}
	return model_cfg


	@pytest.fixture()
	def trainer_cfg():

	trainer_cfg = {
	'devices': 1,
	'num_nodes': 1,
	'accelerator': 'gpu',
	'precision': 16,
	'logger': False,
	'enable_checkpointing': False,
	'replace_sampler_ddp': False,
	'max_epochs': 1000,
	'max_steps': 100000,
	'log_every_n_steps': 10,
	'val_check_interval': 100,
	'limit_val_batches': 50,
	'limit_test_batches': 500,
	'accumulate_grad_batches': 1,
	'gradient_clip_val': 1.0,
	}

	return trainer_cfg


	@pytest.fixture()
	def precision():
	return 32


	@pytest.fixture()
	def gpt_model(model_cfg, trainer_cfg, precision):
	model_cfg['precision'] = precision
	trainer_cfg['precision'] = precision

	strategy = NLPDDPStrategy()

	trainer = Trainer(strategy=strategy, **trainer_cfg)

	cfg = DictConfig(model_cfg)

	model = MegatronGPTModel(cfg=cfg, trainer=trainer)

	return model


	@pytest.fixture()
	def test_text():
	test_text = [
	"hello, world",
	"four score and seven years ago",
	"Your time is limited",
	"If you set goals rediculously high",
	]
	return test_text


	@pytest.mark.run_only_on('GPU')
	class TestGPTModel:
	@pytest.mark.unit
	def test_constructor(self, gpt_model):
	assert isinstance(gpt_model, MegatronGPTModel)

	num_weights = gpt_model.num_weights
	assert num_weights == 6702976

	@pytest.mark.unit
	def test_tokenizer(self, gpt_model, test_text):

	assert isinstance(gpt_model.tokenizer, AutoTokenizer)
	assert gpt_model.tokenizer.name == 'GPT2Tokenizer'
	assert gpt_model.tokenizer.vocab_size == 50257

	ids = [gpt_model.tokenizer.text_to_ids(text) for text in test_text]

	true_ids = [
	[31373, 11, 995],
	[14337, 4776, 290, 3598, 812, 2084],
	[7120, 640, 318, 3614],
	[1532, 345, 900, 4661, 2266, 291, 18117, 1029],
	]
	assert sum([id_list == true_id_list for id_list, true_id_list in zip(ids, true_ids)]) == 4

	@pytest.mark.parametrize(
	"precision",
	[
	32,
	16,
	pytest.param(
	"bf16",
	marks=pytest.mark.skipif(
	not DEVICE_CAPABILITY or DEVICE_CAPABILITY[0] < 8,
	reason='bfloat16 is not supported on this device',
	),
	),
	],
	)
	@pytest.mark.unit
	def test_forward(self, gpt_model, test_text):

	dtype = None
	if gpt_model.cfg['precision'] == 32:
	dtype = torch.float
	elif gpt_model.cfg['precision'] == 16:
	dtype = torch.float16
	elif gpt_model.cfg['precision'] == 'bf16':
	dtype = torch.bfloat16
	else:
	raise ValueError(f"precision: {gpt_model.cfg['precision']} is not supported.")

	gpt_model.eval()

	ids = [gpt_model.tokenizer.text_to_ids(text) for text in test_text]

	id_tensors = [torch.unsqueeze(torch.LongTensor(id_list), dim=0) for id_list in ids]

	masks_and_position_ids = [
	get_ltor_masks_and_position_ids(id_tensor, gpt_model.tokenizer.eos_id, False, False, False)
	for id_tensor in id_tensors
	]

	output_tensors = []
	with torch.no_grad():
	for tokens, attn_mask_and_pos_ids in zip(id_tensors, masks_and_position_ids):
	attn_mask, _, pos_ids = attn_mask_and_pos_ids
	assert tokens.shape == pos_ids.shape
	assert attn_mask.shape[2] == attn_mask.shape[3] == tokens.shape[1] == pos_ids.shape[1]
	with torch.autocast('cuda', dtype=dtype):
	output_tensor = gpt_model.forward(
	tokens=tokens.cuda(),
	text_position_ids=pos_ids.cuda(),
	attention_mask=attn_mask.cuda(),
	labels=None,
	)
	# output is [b s h]
	assert output_tensor.shape[0] == 1
	assert output_tensor.shape[1] == tokens.shape[1]
	assert output_tensor.shape[2] == gpt_model.padded_vocab_size
	assert output_tensor.dtype == dtype
	output_tensors.append(output_tensor)