Spaces:
Runtime error
Runtime error
_base_ = ['./san-vit-b16_pascal_context-640x640.py'] | |
model = dict( | |
type='MultimodalEncoderDecoder', | |
pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', | |
encoder_resolution=0.7, | |
image_encoder=dict( | |
type='VisionTransformer', | |
img_size=(336, 336), | |
patch_size=14, | |
patch_pad=0, | |
embed_dims=1024, | |
num_layers=18, | |
num_heads=16, | |
out_indices=(5, 11, 17), | |
), | |
text_encoder=dict( | |
type='CLIPTextEncoder', | |
embed_dims=768, | |
num_layers=12, | |
num_heads=12, | |
output_dims=768, | |
), | |
decode_head=dict( | |
type='SideAdapterCLIPHead', | |
san_cfg=dict(clip_channels=1024, cfg_decoder=dict(num_heads=16)), | |
maskgen_cfg=dict( | |
num_layers=6, | |
embed_dims=1024, | |
num_heads=16, | |
out_dims=768, | |
))) | |