Spaces:
Runtime error
Runtime error
File size: 1,065 Bytes
412c852 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
_base_ = ['./san-vit-b16_coco-stuff164k-640x640.py']
pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/san/clip_vit-large-patch14-336_3rdparty-0b5df9cb.pth' # noqa
model = dict(
type='MultimodalEncoderDecoder',
pretrained=pretrained,
encoder_resolution=0.7,
image_encoder=dict(
type='VisionTransformer',
img_size=(336, 336),
patch_size=14,
patch_pad=0,
embed_dims=1024,
num_layers=18,
num_heads=16,
out_indices=(5, 11, 17),
),
text_encoder=dict(
type='CLIPTextEncoder',
embed_dims=768,
num_layers=12,
num_heads=12,
output_dims=768,
),
decode_head=dict(
type='SideAdapterCLIPHead',
san_cfg=dict(clip_channels=1024, cfg_decoder=dict(num_heads=16)),
maskgen_cfg=dict(
num_layers=6,
embed_dims=1024,
num_heads=16,
out_dims=768,
)))
# By default, models are trained on 8 GPUs with 4 images per GPU
train_dataloader = dict(batch_size=4)
|