File size: 378 Bytes
3f70f85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
[belle]
model_name=
num_layer=30
head_num=32
inter_size=16384
size_per_head=128
vocab_size=250880
tensor_para_size=1
weight_data_type=fp16
model_variant=bloom-pre
layernorm_eps=1e-05
layernorm_type=pre_layernorm
activation_type=Gelu
has_positional_encoding=False
has_pre_decoder_layernorm=True
has_post_decoder_layernorm=True
use_attention_linear_bias=True
start_id=1
end_id=2
|