Spaces:
Runtime error
Runtime error
_base_ = [ | |
'../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', | |
'../_base_/schedules/schedule_160k.py' | |
] | |
norm_cfg = dict(type='SyncBN', requires_grad=True) | |
crop_size = (512, 512) | |
data_preprocessor = dict( | |
type='SegDataPreProcessor', | |
size=crop_size, | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
bgr_to_rgb=True, | |
pad_val=0, | |
seg_pad_val=255) | |
# model_cfg | |
num_classes = 150 | |
model = dict( | |
type='EncoderDecoder', | |
data_preprocessor=data_preprocessor, | |
backbone=dict( | |
type='ResNet', | |
depth=50, | |
num_stages=4, | |
out_indices=(0, 1, 2, 3), | |
dilations=(1, 1, 1, 1), | |
strides=(1, 2, 2, 2), | |
norm_cfg=norm_cfg, | |
norm_eval=True, | |
style='pytorch', | |
contract_dilation=True, | |
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), | |
decode_head=dict( | |
type='MaskFormerHead', | |
in_channels=[256, 512, 1024, | |
2048], # input channels of pixel_decoder modules | |
feat_channels=256, | |
in_index=[0, 1, 2, 3], | |
num_classes=150, | |
out_channels=256, | |
num_queries=100, | |
pixel_decoder=dict( | |
type='mmdet.PixelDecoder', | |
norm_cfg=dict(type='GN', num_groups=32), | |
act_cfg=dict(type='ReLU')), | |
enforce_decoder_input_project=False, | |
positional_encoding=dict( # SinePositionalEncoding | |
num_feats=128, normalize=True), | |
transformer_decoder=dict( # DetrTransformerDecoder | |
return_intermediate=True, | |
num_layers=6, | |
layer_cfg=dict( # DetrTransformerDecoderLayer | |
self_attn_cfg=dict( # MultiheadAttention | |
embed_dims=256, | |
num_heads=8, | |
attn_drop=0.1, | |
proj_drop=0.1, | |
dropout_layer=None, | |
batch_first=True), | |
cross_attn_cfg=dict( # MultiheadAttention | |
embed_dims=256, | |
num_heads=8, | |
attn_drop=0.1, | |
proj_drop=0.1, | |
dropout_layer=None, | |
batch_first=True), | |
ffn_cfg=dict( | |
embed_dims=256, | |
feedforward_channels=2048, | |
num_fcs=2, | |
act_cfg=dict(type='ReLU', inplace=True), | |
ffn_drop=0.1, | |
dropout_layer=None, | |
add_identity=True)), | |
init_cfg=None), | |
loss_cls=dict( | |
type='mmdet.CrossEntropyLoss', | |
use_sigmoid=False, | |
loss_weight=1.0, | |
reduction='mean', | |
class_weight=[1.0] * num_classes + [0.1]), | |
loss_mask=dict( | |
type='mmdet.FocalLoss', | |
use_sigmoid=True, | |
gamma=2.0, | |
alpha=0.25, | |
reduction='mean', | |
loss_weight=20.0), | |
loss_dice=dict( | |
type='mmdet.DiceLoss', | |
use_sigmoid=True, | |
activate=True, | |
reduction='mean', | |
naive_dice=True, | |
eps=1.0, | |
loss_weight=1.0), | |
train_cfg=dict( | |
assigner=dict( | |
type='mmdet.HungarianAssigner', | |
match_costs=[ | |
dict(type='mmdet.ClassificationCost', weight=1.0), | |
dict( | |
type='mmdet.FocalLossCost', | |
weight=20.0, | |
binary_input=True), | |
dict( | |
type='mmdet.DiceCost', | |
weight=1.0, | |
pred_act=True, | |
eps=1.0) | |
]), | |
sampler=dict(type='mmdet.MaskPseudoSampler'))), | |
# training and testing settings | |
train_cfg=dict(), | |
test_cfg=dict(mode='whole'), | |
) | |
# optimizer | |
optimizer = dict( | |
type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001) | |
optim_wrapper = dict( | |
_delete_=True, | |
type='OptimWrapper', | |
optimizer=optimizer, | |
clip_grad=dict(max_norm=0.01, norm_type=2), | |
paramwise_cfg=dict(custom_keys={ | |
'backbone': dict(lr_mult=0.1), | |
})) | |
# learning policy | |
param_scheduler = [ | |
dict( | |
type='PolyLR', | |
eta_min=0, | |
power=0.9, | |
begin=0, | |
end=160000, | |
by_epoch=False) | |
] | |
# In MaskFormer implementation we use batch size 2 per GPU as default | |
train_dataloader = dict(batch_size=2, num_workers=2) | |
val_dataloader = dict(batch_size=1, num_workers=4) | |
test_dataloader = val_dataloader | |