|
{ |
|
"adaptors_kwargs": { |
|
"flow": { |
|
"class": "FlowAdaptor", |
|
"kwargs": { |
|
"base_shape": [ |
|
224, |
|
224 |
|
], |
|
"flow_mean": [ |
|
0.0, |
|
0.0 |
|
], |
|
"flow_std": [ |
|
25, |
|
25 |
|
], |
|
"name": "flow", |
|
"output_normalized_coordinate": false, |
|
"scale_strategy": "scale_both" |
|
} |
|
} |
|
}, |
|
"detach_uncertainty_head": false, |
|
"encoder_kwargs": { |
|
"data_norm_type": "dinov2", |
|
"name": "dinov2_encoder", |
|
"patch_size": 14, |
|
"size": "large", |
|
"with_registers": false |
|
}, |
|
"encoder_str": "dinov2", |
|
"feature_head_kwargs": { |
|
"dpt_feature": { |
|
"feature_dim": 256, |
|
"hooks": [ |
|
0, |
|
1, |
|
2, |
|
3 |
|
], |
|
"input_feature_dims": [ |
|
1024, |
|
768, |
|
768, |
|
768 |
|
], |
|
"layer_dims": [ |
|
96, |
|
192, |
|
384, |
|
768 |
|
], |
|
"output_width_ratio": 1, |
|
"patch_size": 14, |
|
"pretrained_checkpoint_path": null, |
|
"use_bn": false |
|
}, |
|
"dpt_processor": { |
|
"hidden_dims": [ |
|
128, |
|
128 |
|
], |
|
"input_feature_dim": 256, |
|
"output_dim": 2, |
|
"pretrained_checkpoint_path": null |
|
} |
|
}, |
|
"head_type": "dpt", |
|
"inference_resolution": [ |
|
560, |
|
420 |
|
], |
|
"info_sharing_and_head_structure": "dual+single", |
|
"info_sharing_kwargs": { |
|
"attn_drop": 0.0, |
|
"custom_positional_encoding": null, |
|
"depth": 12, |
|
"dim": 768, |
|
"drop_path": 0.0, |
|
"gradient_checkpointing": false, |
|
"indices": [ |
|
5, |
|
8 |
|
], |
|
"init_values": null, |
|
"input_embed_dim": 1024, |
|
"max_num_views": 2, |
|
"mlp_ratio": 4.0, |
|
"name": "global_attention", |
|
"norm_intermediate": true, |
|
"num_heads": 12, |
|
"pretrained_checkpoint_path": null, |
|
"proj_drop": 0.0, |
|
"qk_norm": false, |
|
"qkv_bias": true, |
|
"size": "base", |
|
"use_rand_idx_pe_for_non_reference_views": false |
|
}, |
|
"info_sharing_str": "global_attention", |
|
"pretrained_backbone_checkpoint_path": null, |
|
"pretrained_checkpoint_path": null, |
|
"uncertainty_adaptors_kwargs": { |
|
"non_occluded_mask": { |
|
"class": "MaskAdaptor", |
|
"kwargs": { |
|
"name": "non_occluded_mask" |
|
} |
|
} |
|
}, |
|
"uncertainty_head_kwargs": { |
|
"dpt_feature": { |
|
"feature_dim": 256, |
|
"hooks": [ |
|
0, |
|
1, |
|
2, |
|
3 |
|
], |
|
"input_feature_dims": [ |
|
1024, |
|
768, |
|
768, |
|
768 |
|
], |
|
"layer_dims": [ |
|
96, |
|
192, |
|
384, |
|
768 |
|
], |
|
"output_width_ratio": 1, |
|
"patch_size": 14, |
|
"pretrained_checkpoint_path": null, |
|
"use_bn": false |
|
}, |
|
"dpt_processor": { |
|
"hidden_dims": [ |
|
128, |
|
128 |
|
], |
|
"input_feature_dim": 256, |
|
"output_dim": 1, |
|
"pretrained_checkpoint_path": null |
|
} |
|
}, |
|
"uncertainty_head_type": "dpt" |
|
} |