_base_ = [ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] crop_size = (512, 512) data_preprocessor = dict( type='SegDataPreProcessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_val=0, size=crop_size, seg_pad_val=255) # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) num_stages = 3 conv_kernel_size = 1 model = dict( type='EncoderDecoder', data_preprocessor=data_preprocessor, pretrained='open-mmlab://resnet50_v1c', backbone=dict( type='ResNetV1c', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 2, 4), strides=(1, 2, 1, 1), norm_cfg=norm_cfg, norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='IterativeDecodeHead', num_stages=num_stages, kernel_update_head=[ dict( type='KernelUpdateHead', num_classes=150, num_ffn_fcs=2, num_heads=8, num_mask_fcs=1, feedforward_channels=2048, in_channels=512, out_channels=512, dropout=0.0, conv_kernel_size=conv_kernel_size, ffn_act_cfg=dict(type='ReLU', inplace=True), with_ffn=True, feat_transform_cfg=dict( conv_cfg=dict(type='Conv2d'), act_cfg=None), kernel_updator_cfg=dict( type='KernelUpdator', in_channels=256, feat_channels=256, out_channels=256, act_cfg=dict(type='ReLU', inplace=True), norm_cfg=dict(type='LN'))) for _ in range(num_stages) ], kernel_generate_head=dict( type='ASPPHead', in_channels=2048, in_index=3, channels=512, dilations=(1, 12, 24, 36), dropout_ratio=0.1, num_classes=150, norm_cfg=norm_cfg, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), auxiliary_head=dict( type='FCNHead', in_channels=1024, in_index=2, channels=256, num_convs=1, concat_input=False, dropout_ratio=0.1, num_classes=150, norm_cfg=norm_cfg, align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole')) # optimizer optim_wrapper = dict( _delete_=True, type='OptimWrapper', optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict( type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=1000), dict( type='MultiStepLR', begin=1000, end=80000, milestones=[60000, 72000], by_epoch=False, ) ] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=1, num_workers=4) test_dataloader = val_dataloader