_base_ = 'knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py' checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa # model settings crop_size = (640, 640) data_preprocessor = dict( type='SegDataPreProcessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_val=0, size=crop_size, seg_pad_val=255) model = dict( data_preprocessor=data_preprocessor, pretrained=checkpoint_file, backbone=dict( embed_dims=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=7, use_abs_pos_embed=False, drop_path_rate=0.4, patch_norm=True), decode_head=dict( kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), auxiliary_head=dict(in_channels=768)) crop_size = (640, 640) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), dict( type='RandomResize', scale=(2048, 640), ratio_range=(0.5, 2.0), keep_ratio=True), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs') ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='Resize', scale=(2048, 640), keep_ratio=True), # add loading annotation after ``Resize`` because ground truth # does not need to do resize data transform dict(type='LoadAnnotations', reduce_zero_label=True), dict(type='PackSegInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=1, num_workers=4) test_dataloader = val_dataloader