[belle] | |
model_name= | |
num_layer=30 | |
head_num=32 | |
inter_size=16384 | |
size_per_head=128 | |
vocab_size=250880 | |
tensor_para_size=1 | |
weight_data_type=fp16 | |
model_variant=bloom-pre | |
layernorm_eps=1e-05 | |
layernorm_type=pre_layernorm | |
activation_type=Gelu | |
has_positional_encoding=False | |
has_pre_decoder_layernorm=True | |
has_post_decoder_layernorm=True | |
use_attention_linear_bias=True | |
start_id=1 | |
end_id=2 | |