# model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) norm_cfg = dict(type='SyncBN', requires_grad=True) data_preprocessor = dict( type='SegDataPreProcessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_val=0, seg_pad_val=255) model = dict( type='EncoderDecoder', data_preprocessor=data_preprocessor, pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', backbone=dict( type='VisionTransformer', img_size=(768, 768), patch_size=16, in_channels=3, embed_dims=1024, num_layers=24, num_heads=16, out_indices=(5, 11, 17, 23), drop_rate=0.1, norm_cfg=backbone_norm_cfg, with_cls_token=False, interpolate_mode='bilinear', ), neck=dict( type='MLANeck', in_channels=[1024, 1024, 1024, 1024], out_channels=256, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU'), ), decode_head=dict( type='SETRMLAHead', in_channels=(256, 256, 256, 256), channels=512, in_index=(0, 1, 2, 3), dropout_ratio=0, mla_channels=128, num_classes=19, norm_cfg=norm_cfg, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), auxiliary_head=[ dict( type='FCNHead', in_channels=256, channels=256, in_index=0, dropout_ratio=0, num_convs=0, kernel_size=1, concat_input=False, num_classes=19, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), dict( type='FCNHead', in_channels=256, channels=256, in_index=1, dropout_ratio=0, num_convs=0, kernel_size=1, concat_input=False, num_classes=19, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), dict( type='FCNHead', in_channels=256, channels=256, in_index=2, dropout_ratio=0, num_convs=0, kernel_size=1, concat_input=False, num_classes=19, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), dict( type='FCNHead', in_channels=256, channels=256, in_index=3, dropout_ratio=0, num_convs=0, kernel_size=1, concat_input=False, num_classes=19, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), ], train_cfg=dict(), test_cfg=dict(mode='whole'))