norm_cfg = dict(type='SyncBN', requires_grad=True) data_preprocessor = dict( type='SegDataPreProcessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_val=0, seg_pad_val=255) model = dict( type='EncoderDecoder', data_preprocessor=data_preprocessor, pretrained=None, backbone=dict( type='BEiT', img_size=(640, 640), patch_size=16, in_channels=3, embed_dims=768, num_layers=12, num_heads=12, mlp_ratio=4, out_indices=(3, 5, 7, 11), qv_bias=True, attn_drop_rate=0.0, drop_path_rate=0.1, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False, init_values=0.1), neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), decode_head=dict( type='UPerHead', in_channels=[768, 768, 768, 768], in_index=[0, 1, 2, 3], pool_scales=(1, 2, 3, 6), channels=768, dropout_ratio=0.1, num_classes=150, norm_cfg=norm_cfg, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), auxiliary_head=dict( type='FCNHead', in_channels=768, in_index=2, channels=256, num_convs=1, concat_input=False, dropout_ratio=0.1, num_classes=150, norm_cfg=norm_cfg, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole'))