diff --git a/conf/maplocnet.yaml b/conf/maplocnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b900ae970c6b905ff2c826293a17b054fd01b6fa --- /dev/null +++ b/conf/maplocnet.yaml @@ -0,0 +1,105 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: true + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v2 + backbone: + encoder: resnet50 + pretrained: true + output_dim: 8 + num_downsample: null + remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 16 + output_dim: 8 + num_classes: + areas: 7 + ways: 10 + nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_0526_re + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "loss/total/val" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsingle-101.yaml b/conf/maplocnetsingle-101.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dfb29c2f3ffc7790681a7c2866d47b68828a3f6 --- /dev/null +++ b/conf/maplocnetsingle-101.yaml @@ -0,0 +1,105 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v2 + backbone: + encoder: resnet101 + pretrained: true + output_dim: 8 + num_downsample: null + remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + areas: 7 + ways: 10 + nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_523_single_A100_no_mutil_scale_augmentation_resnet101_nosingle + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsingle.yaml b/conf/maplocnetsingle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de237a27e841a36b3a60475441a72f5286a0362c --- /dev/null +++ b/conf/maplocnetsingle.yaml @@ -0,0 +1,105 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v2 + backbone: + encoder: resnet101 + pretrained: true + output_dim: 8 + num_downsample: null + remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_523_single_A100_no_mutil_scale_augmentation_resnet101_2 + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsingle0526.yaml b/conf/maplocnetsingle0526.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207741912ee75df3502605906b3ccc6c5c62e9ad --- /dev/null +++ b/conf/maplocnetsingle0526.yaml @@ -0,0 +1,105 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: false + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v2 + backbone: + encoder: resnet50 + pretrained: true + output_dim: 8 + num_downsample: null + remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_523_single_A100_no_mutil_scale + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsingleunet.yaml b/conf/maplocnetsingleunet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1744f7cfe38f4e0854d1fd24440910fc4dc2a055 --- /dev/null +++ b/conf/maplocnetsingleunet.yaml @@ -0,0 +1,105 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v3 + backbone: +# encoder: resnet101 +# pretrained: true + output_dim: 8 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_601_unet + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_DDRNet.yaml b/conf/maplocnetsinglhub_DDRNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87b66a5494f310f9a61f2f0b57f437abde85246a --- /dev/null +++ b/conf/maplocnetsinglhub_DDRNet.yaml @@ -0,0 +1,107 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v5 + architecture: DDRNet23s + backbone: +# encoder: resnet50 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_DDRnet + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddd598b688454fb4f8a0adce8802799c9b95aea1 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml @@ -0,0 +1,112 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: false + flip: false + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: resnet18 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + weighted_embedding: ImprovedAttentionEmbedding + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_norelu_resnet18_ImprovedAttentionEmbedding + gpus: 5 + seed: 42 +training: + lr: 0.0001 + lr_scheduler: + name: StepLR + args: + step_size: 10 + gamma: 0.1 + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6e944e54af3f70bdbddff1a981784e12d6e9d90 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml @@ -0,0 +1,112 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: false + flip: false + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: LightFPN + backbone: + encoder: resnet34 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + weighted_embedding: ImprovedAttentionEmbedding + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_norelu_resnet34Light_ImprovedAttentionEmbedding + gpus: 5 + seed: 42 +training: + lr: 0.0001 + lr_scheduler: + name: StepLR + args: + step_size: 10 + gamma: 0.1 + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 30 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1c24d02cb2420f0167ed4e1b2514bf429a2186d --- /dev/null +++ b/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml @@ -0,0 +1,112 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: false + flip: false + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: resnet34 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + weighted_embedding: ImprovedAttentionEmbedding + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_norelu_resnet34_ImprovedAttentionEmbedding + gpus: 5 + seed: 42 +training: + lr: 0.0001 + lr_scheduler: + name: StepLR + args: + step_size: 10 + gamma: 0.1 + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN-resnet50.yaml b/conf/maplocnetsinglhub_FPN-resnet50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03b7de5dfa0992a76135a3767232660c2782e081 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN-resnet50.yaml @@ -0,0 +1,111 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: false + flip: false + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: resnet50 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_norelu_resnet50_temp + gpus: 2 + seed: 42 +training: + lr: 0.0001 + lr_scheduler: + name: StepLR + args: + step_size: 10 + gamma: 0.1 + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6b12d18c3a68c318ec15b9b786bd153626fbcf0 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml @@ -0,0 +1,112 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: false + flip: false + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: resnet50 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + weighted_embedding: ImprovedAttentionEmbedding + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_norelu_resnet50_ImprovedAttentionEmbedding + gpus: 3 + seed: 42 +training: + lr: 0.0001 + lr_scheduler: + name: StepLR + args: + step_size: 10 + gamma: 0.1 + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN.yaml b/conf/maplocnetsinglhub_FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cb5481c408045c7d52d17e48eed4f75effef799 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN.yaml @@ -0,0 +1,107 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: resnet101 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_FPN_Resnet50_norelu + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_FPN_Mobileone.yaml b/conf/maplocnetsinglhub_FPN_Mobileone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a355e2374fdaa9b42a97d794278d671631d21df5 --- /dev/null +++ b/conf/maplocnetsinglhub_FPN_Mobileone.yaml @@ -0,0 +1,107 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: FPN + backbone: + encoder: mobileone_s3 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocnetsinglhub_FPN_mobileone_s3 + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 10 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/maplocnetsinglhub_PSP.yaml b/conf/maplocnetsinglhub_PSP.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1547128ac29c8dde636bf437351a5bd7a79a7490 --- /dev/null +++ b/conf/maplocnetsinglhub_PSP.yaml @@ -0,0 +1,107 @@ +data: + root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + test_citys: + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true + flip: true + image: + apply: True + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v4 + architecture: PSP + backbone: + encoder: resnet50 +# pretrained: true + output_dim: 8 +# upsampling: 2 +# num_downsample: null +# remove_stride_from_first_conv: false + name: maplocnet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 48 + output_dim: 8 + num_classes: + all: 50 + # ways: 10 + # nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: maplocanet_602_hub_PSP + gpus: 2 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 300000 + devices: ${experiment.gpus} + checkpointing: + monitor: "val/xy_recall_1m" + save_top_k: 5 + mode: max + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/conf/orienternet.yaml b/conf/orienternet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..165294d6dd165b7e0dc5a814cd554fa5b0347b2e --- /dev/null +++ b/conf/orienternet.yaml @@ -0,0 +1,103 @@ +data: + root: '/home/ubuntu/media/MapLocNetDataset/UAV/' + train_citys: + - Paris + - Berlin + - London + - Tokyo + - NewYork + val_citys: +# - Taipei +# - LosAngeles +# - Singapore + - SanFrancisco + image_size: 256 + train: + batch_size: 12 + num_workers: 4 + val: + batch_size: ${..train.batch_size} + num_workers: ${.batch_size} + num_classes: + areas: 7 + ways: 10 + nodes: 33 + pixel_per_meter: 1 + crop_size_meters: 64 + max_init_error: 48 + add_map_mask: true + resize_image: 512 + pad_to_square: true + rectify_pitch: true + augmentation: + rot90: true +# flip: true + image: + apply: true + brightness: 0.5 + contrast: 0.4 + saturation: 0.4 + hue": 0.5/3.14 +model: + image_size: ${data.image_size} + latent_dim: 128 + val_citys: ${data.val_citys} + image_encoder: + name: feature_extractor_v2 + backbone: + encoder: resnet101 + pretrained: true + output_dim: 8 + num_downsample: null + remove_stride_from_first_conv: false + name: orienternet + matching_dim: 8 + z_max: 32 + x_max: 32 + pixel_per_meter: 1 + num_scale_bins: 33 + num_rotations: 64 + map_encoder: + embedding_dim: 16 + output_dim: 8 + num_classes: + areas: 7 + ways: 10 + nodes: 33 + backbone: + encoder: vgg19 + pretrained: false + output_scales: + - 0 + num_downsample: 3 + decoder: + - 128 + - 64 + - 64 + padding: replicate + unary_prior: false + bev_net: + num_blocks: 4 + latent_dim: 128 + output_dim: 8 + confidence: true +experiment: + name: OrienterNet_my_multi_city_debug_code_0815_2_monitor_metric + gpus: 4 + seed: 0 +training: + lr: 0.0001 + lr_scheduler: null + finetune_from_checkpoint: null + trainer: + val_check_interval: 1000 + log_every_n_steps: 100 +# limit_val_batches: 1000 + max_steps: 200000 + devices: ${experiment.gpus} + checkpointing: + monitor: "loss/total/val" + save_top_k: 10 + mode: min + +# filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}' \ No newline at end of file diff --git a/dataset/UAV/dataset.py b/dataset/UAV/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1dbc8121bb355dcbee2ed7f2bc68e85e25b2b808 --- /dev/null +++ b/dataset/UAV/dataset.py @@ -0,0 +1,116 @@ +import torch +from torch.utils.data import Dataset +import os +import cv2 +# @Time : 2023-02-13 22:56 +# @Author : Wang Zhen +# @Email : frozenzhencola@163.com +# @File : SatelliteTool.py +# @Project : TGRS_seqmatch_2023_1 +import numpy as np +import random +from utils.geo import BoundaryBox, Projection +from osm.tiling import TileManager,MapTileManager +from pathlib import Path +from torchvision import transforms +from torch.utils.data import DataLoader + +class UavMapPair(Dataset): + def __init__( + self, + root: Path, + city:str, + training:bool, + transform + ): + super().__init__() + + # self.root = root + + # city = 'Manhattan' + # root = '/root/DATASET/CrossModel/' + # root=Path(root) + self.uav_image_path = root/city/'uav' + self.map_path = root/city/'map' + self.map_vis = root / city / 'map_vis' + info_path = root / city / 'info.csv' + + self.info = np.loadtxt(str(info_path), dtype=str, delimiter=",", skiprows=1) + + self.transform=transform + self.training=training + + def random_center_crop(self,image): + height, width = image.shape[:2] + + # 随机生成剪裁尺寸 + crop_size = random.randint(min(height, width) // 2, min(height, width)) + + # 计算剪裁的起始坐标 + start_x = (width - crop_size) // 2 + start_y = (height - crop_size) // 2 + + # 进行剪裁 + cropped_image = image[start_y:start_y + crop_size, start_x:start_x + crop_size] + + return cropped_image + def __getitem__(self, index: int): + id, uav_name, map_name, \ + uav_long, uav_lat, \ + map_long, map_lat, \ + tile_size_meters, pixel_per_meter, \ + u, v, yaw,dis=self.info[index] + + + uav_image=cv2.imread(str(self.uav_image_path/uav_name)) + if self.training: + uav_image =self.random_center_crop(uav_image) + uav_image=cv2.cvtColor(uav_image,cv2.COLOR_BGR2RGB) + if self.transform: + uav_image=self.transform(uav_image) + map=np.load(str(self.map_path/map_name)) + + return { + 'map':torch.from_numpy(np.ascontiguousarray(map)).long(), + 'image':torch.tensor(uav_image), + 'roll_pitch_yaw':torch.tensor((0, 0, float(yaw))).float(), + 'pixels_per_meter':torch.tensor(float(pixel_per_meter)).float(), + "uv":torch.tensor([float(u), float(v)]).float(), + } + def __len__(self): + return len(self.info) +if __name__ == '__main__': + + root=Path('/root/DATASET/OrienterNet/UavMap/') + city='NewYork' + + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Resize(256), + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) + ]) + + dataset=UavMapPair( + root=root, + city=city, + transform=transform + ) + datasetloder = DataLoader(dataset, batch_size=3) + for batch, i in enumerate(datasetloder): + pass + # 将PyTorch张量转换为PIL图像 + # pil_image = Image.fromarray(i['uav_image'][0].permute(1, 2, 0).byte().numpy()) + + # 显示图像 + # 将PyTorch张量转换为NumPy数组 + # numpy_array = i['uav_image'][0].numpy() + # + # # 显示图像 + # plt.imshow(numpy_array.transpose(1, 2, 0)) + # plt.axis('off') + # plt.show() + # + # map_viz, label = Colormap.apply(i['map'][0]) + # map_viz = map_viz * 255 + # map_viz = map_viz.astype(np.uint8) + # plot_images([map_viz], titles=["OpenStreetMap raster"]) diff --git a/dataset/__init__.py b/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b51bfd2ed3eaf38719d8ee102df779d53d1ffa4 --- /dev/null +++ b/dataset/__init__.py @@ -0,0 +1,4 @@ +# from .UAV.dataset import UavMapPair +from .dataset import UavMapDatasetModule + +# modules = {"UAV": UavMapPair} diff --git a/dataset/dataset.py b/dataset/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2c6a21153a9bab1e7ee3bbbafa302b8ace5fc30c --- /dev/null +++ b/dataset/dataset.py @@ -0,0 +1,109 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from copy import deepcopy +from pathlib import Path +from typing import Any, Dict, List +# from logger import logger +import numpy as np +# import torch +# import torch.utils.data as torchdata +# import torchvision.transforms as tvf +from omegaconf import DictConfig, OmegaConf +import pytorch_lightning as pl +from dataset.UAV.dataset import UavMapPair +# from torch.utils.data import Dataset, DataLoader +# from torchvision import transforms +from torch.utils.data import Dataset, ConcatDataset +from torch.utils.data import Dataset, DataLoader, random_split +import torchvision.transforms as tvf + +# 自定义数据模块类,继承自pl.LightningDataModule +class UavMapDatasetModule(pl.LightningDataModule): + + + def __init__(self, cfg: Dict[str, Any]): + super().__init__() + + # default_cfg = OmegaConf.create(self.default_cfg) + # OmegaConf.set_struct(default_cfg, True) # cannot add new keys + # self.cfg = OmegaConf.merge(default_cfg, cfg) + self.cfg=cfg + # self.transform = tvf.Compose([ + # tvf.ToTensor(), + # tvf.Resize(self.cfg.image_size), + # tvf.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) + # ]) + + tfs = [] + tfs.append(tvf.ToTensor()) + tfs.append(tvf.Resize(self.cfg.image_size)) + self.val_tfs = tvf.Compose(tfs) + + # transforms.Resize(self.cfg.image_size), + if cfg.augmentation.image.apply: + args = OmegaConf.masked_copy( + cfg.augmentation.image, ["brightness", "contrast", "saturation", "hue"] + ) + tfs.append(tvf.ColorJitter(**args)) + self.train_tfs = tvf.Compose(tfs) + + # self.train_tfs=self.transform + # self.val_tfs = self.transform + self.init() + def init(self): + self.train_dataset = ConcatDataset([ + UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.train_tfs) + for city in self.cfg.train_citys + ]) + + self.val_dataset = ConcatDataset([ + UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.val_tfs) + for city in self.cfg.val_citys + ]) + self.test_dataset = ConcatDataset([ + UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.val_tfs) + for city in self.cfg.test_citys + ]) + + # self.val_datasets = { + # city:UavMapPair(root=Path(self.cfg.root),city=city,transform=self.val_tfs) + # for city in self.cfg.val_citys + # } + # logger.info("train data len:{},val data len:{}".format(len(self.train_dataset),len(self.val_dataset))) + # # 定义分割比例 + # train_ratio = 0.8 # 训练集比例 + # # 计算分割的样本数量 + # train_size = int(len(self.dataset) * train_ratio) + # val_size = len(self.dataset) - train_size + # self.train_dataset, self.val_dataset = random_split(self.dataset, [train_size, val_size]) + def train_dataloader(self): + train_loader = DataLoader(self.train_dataset, + batch_size=self.cfg.train.batch_size, + num_workers=self.cfg.train.num_workers, + shuffle=True,pin_memory = True) + return train_loader + + def val_dataloader(self): + val_loader = DataLoader(self.val_dataset, + batch_size=self.cfg.val.batch_size, + num_workers=self.cfg.val.num_workers, + shuffle=True,pin_memory = True) + # + # my_dict = {k: v for k, v in self.val_datasets} + # val_loaders={city: DataLoader(dataset, + # batch_size=self.cfg.val.batch_size, + # num_workers=self.cfg.val.num_workers, + # shuffle=False,pin_memory = True) for city, dataset in self.val_datasets.items()} + return val_loader + def test_dataloader(self): + val_loader = DataLoader(self.test_dataset, + batch_size=self.cfg.val.batch_size, + num_workers=self.cfg.val.num_workers, + shuffle=True,pin_memory = True) + # + # my_dict = {k: v for k, v in self.val_datasets} + # val_loaders={city: DataLoader(dataset, + # batch_size=self.cfg.val.batch_size, + # num_workers=self.cfg.val.num_workers, + # shuffle=False,pin_memory = True) for city, dataset in self.val_datasets.items()} + return val_loader \ No newline at end of file diff --git a/dataset/image.py b/dataset/image.py new file mode 100644 index 0000000000000000000000000000000000000000..75b3dc68cc2481150c5ff938483ae640956bcf0d --- /dev/null +++ b/dataset/image.py @@ -0,0 +1,140 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from typing import Callable, Optional, Union, Sequence + +import numpy as np +import torch +import torchvision.transforms.functional as tvf +import collections +from scipy.spatial.transform import Rotation + +from utils.geometry import from_homogeneous, to_homogeneous +from utils.wrappers import Camera + + +def rectify_image( + image: torch.Tensor, + cam: Camera, + roll: float, + pitch: Optional[float] = None, + valid: Optional[torch.Tensor] = None, +): + *_, h, w = image.shape + grid = torch.meshgrid( + [torch.arange(w, device=image.device), torch.arange(h, device=image.device)], + indexing="xy", + ) + grid = torch.stack(grid, -1).to(image.dtype) + + if pitch is not None: + args = ("ZX", (roll, pitch)) + else: + args = ("Z", roll) + R = Rotation.from_euler(*args, degrees=True).as_matrix() + R = torch.from_numpy(R).to(image) + + grid_rect = to_homogeneous(cam.normalize(grid)) @ R.T + grid_rect = cam.denormalize(from_homogeneous(grid_rect)) + grid_norm = (grid_rect + 0.5) / grid.new_tensor([w, h]) * 2 - 1 + rectified = torch.nn.functional.grid_sample( + image[None], + grid_norm[None], + align_corners=False, + mode="bilinear", + ).squeeze(0) + if valid is None: + valid = torch.all((grid_norm >= -1) & (grid_norm <= 1), -1) + else: + valid = ( + torch.nn.functional.grid_sample( + valid[None, None].float(), + grid_norm[None], + align_corners=False, + mode="nearest", + )[0, 0] + > 0 + ) + return rectified, valid + + +def resize_image( + image: torch.Tensor, + size: Union[int, Sequence, np.ndarray], + fn: Optional[Callable] = None, + camera: Optional[Camera] = None, + valid: np.ndarray = None, +): + """Resize an image to a fixed size, or according to max or min edge.""" + *_, h, w = image.shape + if fn is not None: + assert isinstance(size, int) + scale = size / fn(h, w) + h_new, w_new = int(round(h * scale)), int(round(w * scale)) + scale = (scale, scale) + else: + if isinstance(size, (collections.abc.Sequence, np.ndarray)): + w_new, h_new = size + elif isinstance(size, int): + w_new = h_new = size + else: + raise ValueError(f"Incorrect new size: {size}") + scale = (w_new / w, h_new / h) + if (w, h) != (w_new, h_new): + mode = tvf.InterpolationMode.BILINEAR + image = tvf.resize(image, (h_new, w_new), interpolation=mode, antialias=True) + image.clip_(0, 1) + if camera is not None: + camera = camera.scale(scale) + if valid is not None: + valid = tvf.resize( + valid.unsqueeze(0), + (h_new, w_new), + interpolation=tvf.InterpolationMode.NEAREST, + ).squeeze(0) + ret = [image, scale] + if camera is not None: + ret.append(camera) + if valid is not None: + ret.append(valid) + return ret + + +def pad_image( + image: torch.Tensor, + size: Union[int, Sequence, np.ndarray], + camera: Optional[Camera] = None, + valid: torch.Tensor = None, + crop_and_center: bool = False, +): + if isinstance(size, int): + w_new = h_new = size + elif isinstance(size, (collections.abc.Sequence, np.ndarray)): + w_new, h_new = size + else: + raise ValueError(f"Incorrect new size: {size}") + *c, h, w = image.shape + if crop_and_center: + diff = np.array([w - w_new, h - h_new]) + left, top = left_top = np.round(diff / 2).astype(int) + right, bottom = diff - left_top + else: + assert h <= h_new + assert w <= w_new + top = bottom = left = right = 0 + slice_out = np.s_[..., : min(h, h_new), : min(w, w_new)] + slice_in = np.s_[ + ..., max(top, 0) : h - max(bottom, 0), max(left, 0) : w - max(right, 0) + ] + if (w, h) == (w_new, h_new): + out = image + else: + out = torch.zeros((*c, h_new, w_new), dtype=image.dtype) + out[slice_out] = image[slice_in] + if camera is not None: + camera = camera.crop((max(left, 0), max(top, 0)), (w_new, h_new)) + out_valid = torch.zeros((h_new, w_new), dtype=torch.bool) + out_valid[slice_out] = True if valid is None else valid[slice_in] + if camera is not None: + return out, out_valid, camera + else: + return out, out_valid diff --git a/dataset/torch.py b/dataset/torch.py new file mode 100644 index 0000000000000000000000000000000000000000..9547ca149c606a345e5b8916591e43c26031022c --- /dev/null +++ b/dataset/torch.py @@ -0,0 +1,111 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import collections +import os + +import torch +from torch.utils.data import get_worker_info +from torch.utils.data._utils.collate import ( + default_collate_err_msg_format, + np_str_obj_array_pattern, +) +from lightning_fabric.utilities.seed import pl_worker_init_function +from lightning_utilities.core.apply_func import apply_to_collection +from lightning_fabric.utilities.apply_func import move_data_to_device + + +def collate(batch): + """Difference with PyTorch default_collate: it can stack other tensor-like objects. + Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich + https://github.com/cvg/pixloc + Released under the Apache License 2.0 + """ + if not isinstance(batch, list): # no batching + return batch + elem = batch[0] + elem_type = type(elem) + if isinstance(elem, torch.Tensor): + out = None + if torch.utils.data.get_worker_info() is not None: + # If we're in a background process, concatenate directly into a + # shared memory tensor to avoid an extra copy + numel = sum(x.numel() for x in batch) + storage = elem.storage()._new_shared(numel, device=elem.device) + out = elem.new(storage).resize_(len(batch), *list(elem.size())) + return torch.stack(batch, 0, out=out) + elif ( + elem_type.__module__ == "numpy" + and elem_type.__name__ != "str_" + and elem_type.__name__ != "string_" + ): + if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap": + # array of string classes and object + if np_str_obj_array_pattern.search(elem.dtype.str) is not None: + raise TypeError(default_collate_err_msg_format.format(elem.dtype)) + + return collate([torch.as_tensor(b) for b in batch]) + elif elem.shape == (): # scalars + return torch.as_tensor(batch) + elif isinstance(elem, float): + return torch.tensor(batch, dtype=torch.float64) + elif isinstance(elem, int): + return torch.tensor(batch) + elif isinstance(elem, (str, bytes)): + return batch + elif isinstance(elem, collections.abc.Mapping): + return {key: collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple + return elem_type(*(collate(samples) for samples in zip(*batch))) + elif isinstance(elem, collections.abc.Sequence): + # check to make sure that the elements in batch have consistent size + it = iter(batch) + elem_size = len(next(it)) + if not all(len(elem) == elem_size for elem in it): + raise RuntimeError("each element in list of batch should be of equal size") + transposed = zip(*batch) + return [collate(samples) for samples in transposed] + else: + # try to stack anyway in case the object implements stacking. + try: + return torch.stack(batch, 0) + except TypeError as e: + if "expected Tensor as element" in str(e): + return batch + else: + raise e + + +def set_num_threads(nt): + """Force numpy and other libraries to use a limited number of threads.""" + try: + import mkl + except ImportError: + pass + else: + mkl.set_num_threads(nt) + torch.set_num_threads(1) + os.environ["IPC_ENABLE"] = "1" + for o in [ + "OPENBLAS_NUM_THREADS", + "NUMEXPR_NUM_THREADS", + "OMP_NUM_THREADS", + "MKL_NUM_THREADS", + ]: + os.environ[o] = str(nt) + + +def worker_init_fn(i): + info = get_worker_info() + pl_worker_init_function(info.id) + num_threads = info.dataset.cfg.get("num_threads") + if num_threads is not None: + set_num_threads(num_threads) + + +def unbatch_to_device(data, device="cpu"): + data = move_data_to_device(data, device) + data = apply_to_collection(data, torch.Tensor, lambda x: x.squeeze(0)) + data = apply_to_collection( + data, list, lambda x: x[0] if len(x) == 1 and isinstance(x[0], str) else x + ) + return data diff --git a/evaluation/kitti.py b/evaluation/kitti.py new file mode 100644 index 0000000000000000000000000000000000000000..e91da069f307a533b0471a3fb43f8622cadc60db --- /dev/null +++ b/evaluation/kitti.py @@ -0,0 +1,89 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import argparse +from pathlib import Path +from typing import Optional, Tuple + +from omegaconf import OmegaConf, DictConfig + +from .. import logger +from ..data import KittiDataModule +from .run import evaluate + + +default_cfg_single = OmegaConf.create({}) +# For the sequential evaluation, we need to center the map around the GT location, +# since random offsets would accumulate and leave only the GT location with a valid mask. +# This should not have much impact on the results. +default_cfg_sequential = OmegaConf.create( + { + "data": { + "mask_radius": KittiDataModule.default_cfg["max_init_error"], + "prior_range_rotation": KittiDataModule.default_cfg[ + "max_init_error_rotation" + ] + + 1, + "max_init_error": 0, + "max_init_error_rotation": 0, + }, + "chunking": { + "max_length": 100, # about 10s? + }, + } +) + + +def run( + split: str, + experiment: str, + cfg: Optional[DictConfig] = None, + sequential: bool = False, + thresholds: Tuple[int] = (1, 3, 5), + **kwargs, +): + cfg = cfg or {} + if isinstance(cfg, dict): + cfg = OmegaConf.create(cfg) + default = default_cfg_sequential if sequential else default_cfg_single + cfg = OmegaConf.merge(default, cfg) + dataset = KittiDataModule(cfg.get("data", {})) + + metrics = evaluate( + experiment, + cfg, + dataset, + split=split, + sequential=sequential, + viz_kwargs=dict(show_dir_error=True, show_masked_prob=False), + **kwargs, + ) + + keys = ["directional_error", "yaw_max_error"] + if sequential: + keys += ["directional_seq_error", "yaw_seq_error"] + for k in keys: + rec = metrics[k].recall(thresholds).double().numpy().round(2).tolist() + logger.info("Recall %s: %s at %s m/°", k, rec, thresholds) + return metrics + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--experiment", type=str, required=True) + parser.add_argument( + "--split", type=str, default="test", choices=["test", "val", "train"] + ) + parser.add_argument("--sequential", action="store_true") + parser.add_argument("--output_dir", type=Path) + parser.add_argument("--num", type=int) + parser.add_argument("dotlist", nargs="*") + args = parser.parse_args() + cfg = OmegaConf.from_cli(args.dotlist) + run( + args.split, + args.experiment, + cfg, + args.sequential, + output_dir=args.output_dir, + num=args.num, + ) diff --git a/evaluation/mapillary.py b/evaluation/mapillary.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/evaluation/run.py b/evaluation/run.py new file mode 100644 index 0000000000000000000000000000000000000000..6e0697ecd367f7b2d9c14ba164914ee7a7812f71 --- /dev/null +++ b/evaluation/run.py @@ -0,0 +1,252 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import functools +from itertools import islice +from typing import Callable, Dict, Optional, Tuple +from pathlib import Path + +import numpy as np +import torch +from omegaconf import DictConfig, OmegaConf +from torchmetrics import MetricCollection +from pytorch_lightning import seed_everything +from tqdm import tqdm + +from logger import logger, EXPERIMENTS_PATH +from dataset.torch import collate, unbatch_to_device +from models.voting import argmax_xyr, fuse_gps +from models.metrics import AngleError, LateralLongitudinalError, Location2DError +from models.sequential import GPSAligner, RigidAligner +from module import GenericModule +from utils.io import download_file, DATA_URL +from evaluation.viz import plot_example_single, plot_example_sequential +from evaluation.utils import write_dump + + +pretrained_models = dict( + OrienterNet_MGL=("orienternet_mgl.ckpt", dict(num_rotations=256)), +) + + +def resolve_checkpoint_path(experiment_or_path: str) -> Path: + path = Path(experiment_or_path) + if not path.exists(): + # provided name of experiment + path = Path(EXPERIMENTS_PATH, *experiment_or_path.split("/")) + if not path.exists(): + if experiment_or_path in set(p for p, _ in pretrained_models.values()): + download_file(f"{DATA_URL}/{experiment_or_path}", path) + else: + raise FileNotFoundError(path) + if path.is_file(): + return path + # provided only the experiment name + maybe_path = path / "last-step-v1.ckpt" + if not maybe_path.exists(): + maybe_path = path / "last.ckpt" + if not maybe_path.exists(): + raise FileNotFoundError(f"Could not find any checkpoint in {path}.") + return maybe_path + + +@torch.no_grad() +def evaluate_single_image( + dataloader: torch.utils.data.DataLoader, + model: GenericModule, + num: Optional[int] = None, + callback: Optional[Callable] = None, + progress: bool = True, + mask_index: Optional[Tuple[int]] = None, + has_gps: bool = False, +): + ppm = model.model.conf.pixel_per_meter + metrics = MetricCollection(model.model.metrics()) + metrics["directional_error"] = LateralLongitudinalError(ppm) + if has_gps: + metrics["xy_gps_error"] = Location2DError("uv_gps", ppm) + metrics["xy_fused_error"] = Location2DError("uv_fused", ppm) + metrics["yaw_fused_error"] = AngleError("yaw_fused") + metrics = metrics.to(model.device) + + for i, batch_ in enumerate( + islice(tqdm(dataloader, total=num, disable=not progress), num) + ): + batch = model.transfer_batch_to_device(batch_, model.device, i) + # Ablation: mask semantic classes + if mask_index is not None: + mask = batch["map"][0, mask_index[0]] == (mask_index[1] + 1) + batch["map"][0, mask_index[0]][mask] = 0 + pred = model(batch) + + if has_gps: + (uv_gps,) = pred["uv_gps"] = batch["uv_gps"] + pred["log_probs_fused"] = fuse_gps( + pred["log_probs"], uv_gps, ppm, sigma=batch["accuracy_gps"] + ) + uvt_fused = argmax_xyr(pred["log_probs_fused"]) + pred["uv_fused"] = uvt_fused[..., :2] + pred["yaw_fused"] = uvt_fused[..., -1] + del uv_gps, uvt_fused + + results = metrics(pred, batch) + if callback is not None: + callback( + i, model, unbatch_to_device(pred), unbatch_to_device(batch_), results + ) + del batch_, batch, pred, results + + return metrics.cpu() + + +@torch.no_grad() +def evaluate_sequential( + dataset: torch.utils.data.Dataset, + chunk2idx: Dict, + model: GenericModule, + num: Optional[int] = None, + shuffle: bool = False, + callback: Optional[Callable] = None, + progress: bool = True, + num_rotations: int = 512, + mask_index: Optional[Tuple[int]] = None, + has_gps: bool = True, +): + chunk_keys = list(chunk2idx) + if shuffle: + chunk_keys = [chunk_keys[i] for i in torch.randperm(len(chunk_keys))] + if num is not None: + chunk_keys = chunk_keys[:num] + lengths = [len(chunk2idx[k]) for k in chunk_keys] + logger.info( + "Min/max/med lengths: %d/%d/%d, total number of images: %d", + min(lengths), + np.median(lengths), + max(lengths), + sum(lengths), + ) + viz = callback is not None + + metrics = MetricCollection(model.model.metrics()) + ppm = model.model.conf.pixel_per_meter + metrics["directional_error"] = LateralLongitudinalError(ppm) + metrics["xy_seq_error"] = Location2DError("uv_seq", ppm) + metrics["yaw_seq_error"] = AngleError("yaw_seq") + metrics["directional_seq_error"] = LateralLongitudinalError(ppm, key="uv_seq") + if has_gps: + metrics["xy_gps_error"] = Location2DError("uv_gps", ppm) + metrics["xy_gps_seq_error"] = Location2DError("uv_gps_seq", ppm) + metrics["yaw_gps_seq_error"] = AngleError("yaw_gps_seq") + metrics = metrics.to(model.device) + + keys_save = ["uvr_max", "uv_max", "yaw_max", "uv_expectation"] + if has_gps: + keys_save.append("uv_gps") + if viz: + keys_save.append("log_probs") + + for chunk_index, key in enumerate(tqdm(chunk_keys, disable=not progress)): + indices = chunk2idx[key] + aligner = RigidAligner(track_priors=viz, num_rotations=num_rotations) + if has_gps: + aligner_gps = GPSAligner(track_priors=viz, num_rotations=num_rotations) + batches = [] + preds = [] + for i in indices: + data = dataset[i] + data = model.transfer_batch_to_device(data, model.device, 0) + pred = model(collate([data])) + + canvas = data["canvas"] + data["xy_geo"] = xy = canvas.to_xy(data["uv"].double()) + data["yaw"] = yaw = data["roll_pitch_yaw"][-1].double() + aligner.update(pred["log_probs"][0], canvas, xy, yaw) + + if has_gps: + (uv_gps) = pred["uv_gps"] = data["uv_gps"][None] + xy_gps = canvas.to_xy(uv_gps.double()) + aligner_gps.update(xy_gps, data["accuracy_gps"], canvas, xy, yaw) + + if not viz: + data.pop("image") + data.pop("map") + batches.append(data) + preds.append({k: pred[k][0] for k in keys_save}) + del pred + + xy_gt = torch.stack([b["xy_geo"] for b in batches]) + yaw_gt = torch.stack([b["yaw"] for b in batches]) + aligner.compute() + xy_seq, yaw_seq = aligner.transform(xy_gt, yaw_gt) + if has_gps: + aligner_gps.compute() + xy_gps_seq, yaw_gps_seq = aligner_gps.transform(xy_gt, yaw_gt) + results = [] + for i in range(len(indices)): + preds[i]["uv_seq"] = batches[i]["canvas"].to_uv(xy_seq[i]).float() + preds[i]["yaw_seq"] = yaw_seq[i].float() + if has_gps: + preds[i]["uv_gps_seq"] = ( + batches[i]["canvas"].to_uv(xy_gps_seq[i]).float() + ) + preds[i]["yaw_gps_seq"] = yaw_gps_seq[i].float() + results.append(metrics(preds[i], batches[i])) + if viz: + callback(chunk_index, model, batches, preds, results, aligner) + del aligner, preds, batches, results + return metrics.cpu() + + +def evaluate( + experiment: str, + cfg: DictConfig, + dataset, + split: str, + sequential: bool = False, + output_dir: Optional[Path] = None, + callback: Optional[Callable] = None, + num_workers: int = 1, + viz_kwargs=None, + **kwargs, +): + if experiment in pretrained_models: + experiment, cfg_override = pretrained_models[experiment] + cfg = OmegaConf.merge(OmegaConf.create(dict(model=cfg_override)), cfg) + + logger.info("Evaluating model %s with config %s", experiment, cfg) + checkpoint_path = resolve_checkpoint_path(experiment) + model = GenericModule.load_from_checkpoint( + checkpoint_path, cfg=cfg, find_best=not experiment.endswith(".ckpt") + ) + model = model.eval() + if torch.cuda.is_available(): + model = model.cuda() + + dataset.prepare_data() + dataset.setup() + + if output_dir is not None: + output_dir.mkdir(exist_ok=True, parents=True) + if callback is None: + if sequential: + callback = plot_example_sequential + else: + callback = plot_example_single + callback = functools.partial( + callback, out_dir=output_dir, **(viz_kwargs or {}) + ) + kwargs = {**kwargs, "callback": callback} + + seed_everything(dataset.cfg.seed) + if sequential: + dset, chunk2idx = dataset.sequence_dataset(split, **cfg.chunking) + metrics = evaluate_sequential(dset, chunk2idx, model, **kwargs) + else: + loader = dataset.dataloader(split, shuffle=True, num_workers=num_workers) + metrics = evaluate_single_image(loader, model, **kwargs) + + results = metrics.compute() + logger.info("All results: %s", results) + if output_dir is not None: + write_dump(output_dir, experiment, cfg, results, metrics) + logger.info("Outputs have been written to %s.", output_dir) + return metrics diff --git a/evaluation/utils.py b/evaluation/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6cc954ed20557c351965cad89aa2e249986986ee --- /dev/null +++ b/evaluation/utils.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +from omegaconf import OmegaConf + +from utils.io import write_json + + +def compute_recall(errors): + num_elements = len(errors) + sort_idx = np.argsort(errors) + errors = np.array(errors.copy())[sort_idx] + recall = (np.arange(num_elements) + 1) / num_elements + recall = np.r_[0, recall] + errors = np.r_[0, errors] + return errors, recall + + +def compute_auc(errors, recall, thresholds): + aucs = [] + for t in thresholds: + last_index = np.searchsorted(errors, t, side="right") + r = np.r_[recall[:last_index], recall[last_index - 1]] + e = np.r_[errors[:last_index], t] + auc = np.trapz(r, x=e) / t + aucs.append(auc * 100) + return aucs + + +def write_dump(output_dir, experiment, cfg, results, metrics): + dump = { + "experiment": experiment, + "cfg": OmegaConf.to_container(cfg), + "results": results, + "errors": {}, + } + for k, m in metrics.items(): + if hasattr(m, "get_errors"): + dump["errors"][k] = m.get_errors().numpy() + write_json(output_dir / "log.json", dump) diff --git a/evaluation/viz.py b/evaluation/viz.py new file mode 100644 index 0000000000000000000000000000000000000000..9cd9f7dfd0f2103f2ebdda8cfe8022ad5a2e719b --- /dev/null +++ b/evaluation/viz.py @@ -0,0 +1,178 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +import torch +import matplotlib.pyplot as plt + +from utils.io import write_torch_image +from utils.viz_2d import plot_images, features_to_RGB, save_plot +from utils.viz_localization import ( + likelihood_overlay, + plot_pose, + plot_dense_rotations, + add_circle_inset, +) +from osm.viz import Colormap, plot_nodes + + +def plot_example_single( + idx, + model, + pred, + data, + results, + plot_bev=True, + out_dir=None, + fig_for_paper=False, + show_gps=False, + show_fused=False, + show_dir_error=False, + show_masked_prob=False, +): + scene, name, rasters, uv_gt = (data[k] for k in ("scene", "name", "map", "uv")) + uv_gps = data.get("uv_gps") + yaw_gt = data["roll_pitch_yaw"][-1].numpy() + image = data["image"].permute(1, 2, 0) + if "valid" in data: + image = image.masked_fill(~data["valid"].unsqueeze(-1), 0.3) + + lp_uvt = lp_uv = pred["log_probs"] + if show_fused and "log_probs_fused" in pred: + lp_uvt = lp_uv = pred["log_probs_fused"] + elif not show_masked_prob and "scores_unmasked" in pred: + lp_uvt = lp_uv = pred["scores_unmasked"] + has_rotation = lp_uvt.ndim == 3 + if has_rotation: + lp_uv = lp_uvt.max(-1).values + if lp_uv.min() > -np.inf: + lp_uv = lp_uv.clip(min=np.percentile(lp_uv, 1)) + prob = lp_uv.exp() + uv_p, yaw_p = pred["uv_max"], pred.get("yaw_max") + if show_fused and "uv_fused" in pred: + uv_p, yaw_p = pred["uv_fused"], pred.get("yaw_fused") + feats_map = pred["map"]["map_features"][0] + (feats_map_rgb,) = features_to_RGB(feats_map.numpy()) + + text1 = rf'$\Delta xy$: {results["xy_max_error"]:.1f}m' + if has_rotation: + text1 += rf', $\Delta\theta$: {results["yaw_max_error"]:.1f}°' + if show_fused and "xy_fused_error" in results: + text1 += rf', $\Delta xy_{{fused}}$: {results["xy_fused_error"]:.1f}m' + text1 += rf', $\Delta\theta_{{fused}}$: {results["yaw_fused_error"]:.1f}°' + if show_dir_error and "directional_error" in results: + err_lat, err_lon = results["directional_error"] + text1 += rf", $\Delta$lateral/longitundinal={err_lat:.1f}m/{err_lon:.1f}m" + if "xy_gps_error" in results: + text1 += rf', $\Delta xy_{{GPS}}$: {results["xy_gps_error"]:.1f}m' + + map_viz = Colormap.apply(rasters) + overlay = likelihood_overlay(prob.numpy(), map_viz.mean(-1, keepdims=True)) + plot_images( + [image, map_viz, overlay, feats_map_rgb], + titles=[text1, "map", "likelihood", "neural map"], + dpi=75, + cmaps="jet", + ) + fig = plt.gcf() + axes = fig.axes + axes[1].images[0].set_interpolation("none") + axes[2].images[0].set_interpolation("none") + Colormap.add_colorbar() + plot_nodes(1, rasters[2]) + + if show_gps and uv_gps is not None: + plot_pose([1], uv_gps, c="blue") + plot_pose([1], uv_gt, yaw_gt, c="red") + plot_pose([1], uv_p, yaw_p, c="k") + plot_dense_rotations(2, lp_uvt.exp()) + inset_center = pred["uv_max"] if results["xy_max_error"] < 5 else uv_gt + axins = add_circle_inset(axes[2], inset_center) + axins.scatter(*uv_gt, lw=1, c="red", ec="k", s=50, zorder=15) + axes[0].text( + 0.003, + 0.003, + f"{scene}/{name}", + transform=axes[0].transAxes, + fontsize=3, + va="bottom", + ha="left", + color="w", + ) + plt.show() + if out_dir is not None: + name_ = name.replace("/", "_") + p = str(out_dir / f"{scene}_{name_}_{{}}.pdf") + save_plot(p.format("pred")) + plt.close() + + if fig_for_paper: + # !cp ../datasets/MGL/{scene}/images/{name}.jpg {out_dir}/{scene}_{name}.jpg + plot_images([map_viz]) + plt.gca().images[0].set_interpolation("none") + plot_nodes(0, rasters[2]) + plot_pose([0], uv_gt, yaw_gt, c="red") + plot_pose([0], pred["uv_max"], pred["yaw_max"], c="k") + save_plot(p.format("map")) + plt.close() + plot_images([lp_uv], cmaps="jet") + plot_dense_rotations(0, lp_uvt.exp()) + save_plot(p.format("loglikelihood"), dpi=100) + plt.close() + plot_images([overlay]) + plt.gca().images[0].set_interpolation("none") + axins = add_circle_inset(plt.gca(), inset_center) + axins.scatter(*uv_gt, lw=1, c="red", ec="k", s=50) + save_plot(p.format("likelihood")) + plt.close() + write_torch_image( + p.format("neuralmap").replace("pdf", "jpg"), feats_map_rgb + ) + write_torch_image(p.format("image").replace("pdf", "jpg"), image.numpy()) + + if not plot_bev: + return + + feats_q = pred["features_bev"] + mask_bev = pred["valid_bev"] + prior = None + if "log_prior" in pred["map"]: + prior = pred["map"]["log_prior"][0].sigmoid() + if "bev" in pred and "confidence" in pred["bev"]: + conf_q = pred["bev"]["confidence"] + else: + conf_q = torch.norm(feats_q, dim=0) + conf_q = conf_q.masked_fill(~mask_bev, np.nan) + (feats_q_rgb,) = features_to_RGB(feats_q.numpy(), masks=[mask_bev.numpy()]) + # feats_map_rgb, feats_q_rgb, = features_to_RGB( + # feats_map.numpy(), feats_q.numpy(), masks=[None, mask_bev]) + norm_map = torch.norm(feats_map, dim=0) + + plot_images( + [conf_q, feats_q_rgb, norm_map] + ([] if prior is None else [prior]), + titles=["BEV confidence", "BEV features", "map norm"] + + ([] if prior is None else ["map prior"]), + dpi=50, + cmaps="jet", + ) + plt.show() + + if out_dir is not None: + save_plot(p.format("bev")) + plt.close() + + +def plot_example_sequential( + idx, + model, + pred, + data, + results, + plot_bev=True, + out_dir=None, + fig_for_paper=False, + show_gps=False, + show_fused=False, + show_dir_error=False, + show_masked_prob=False, +): + return diff --git a/feature_extractor_models/__init__.py b/feature_extractor_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3420f4a51d4d7684b94d5782a97f2902b6c20897 --- /dev/null +++ b/feature_extractor_models/__init__.py @@ -0,0 +1,82 @@ + +from . import encoders +from . import decoders + + +from .decoders.unet import Unet +from .decoders.unetplusplus import UnetPlusPlus +from .decoders.manet import MAnet +from .decoders.linknet import Linknet +from .decoders.fpn import FPN +from .decoders.lightfpn import LightFPN +from .decoders.pspnet import PSPNet +from .decoders.deeplabv3 import DeepLabV3, DeepLabV3Plus +from .decoders.pan import PAN +from .base.hub_mixin import from_pretrained + +from .__version__ import __version__ + +# some private imports for create_model function +from typing import Optional as _Optional +import torch as _torch + + +def create_model( + arch: str, + encoder_name: str = "resnet34", + encoder_weights: _Optional[str] = "imagenet", + in_channels: int = 3, + classes: int = 1, + **kwargs, +) -> _torch.nn.Module: + """Models entrypoint, allows to create any model architecture just with + parameters, without using its class + """ + + archs = [ + Unet, + UnetPlusPlus, + MAnet, + Linknet, + FPN, + LightFPN, + PSPNet, + DeepLabV3, + DeepLabV3Plus, + PAN, + ] + archs_dict = {a.__name__.lower(): a for a in archs} + try: + model_class = archs_dict[arch.lower()] + except KeyError: + raise KeyError( + "Wrong architecture type `{}`. Available options are: {}".format( + arch, list(archs_dict.keys()) + ) + ) + return model_class( + encoder_name=encoder_name, + encoder_weights=encoder_weights, + in_channels=in_channels, + classes=classes, + **kwargs, + ) + + +__all__ = [ + "encoders", + "decoders", + "Unet", + "UnetPlusPlus", + "MAnet", + "Linknet", + "FPN", + "LightFPN", + "PSPNet", + "DeepLabV3", + "DeepLabV3Plus", + "PAN", + "from_pretrained", + "create_model", + "__version__", +] diff --git a/feature_extractor_models/__version__.py b/feature_extractor_models/__version__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ced1f115c071a33685f437ee864588488616059 --- /dev/null +++ b/feature_extractor_models/__version__.py @@ -0,0 +1,3 @@ +VERSION = (0, 3, "4dev0") + +__version__ = ".".join(map(str, VERSION)) diff --git a/feature_extractor_models/base/__init__.py b/feature_extractor_models/base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0df076d9802cc9de24092a20b729f53042285459 --- /dev/null +++ b/feature_extractor_models/base/__init__.py @@ -0,0 +1,13 @@ +from .model import SegmentationModel + +from .modules import Conv2dReLU, Attention + +from .heads import SegmentationHead, ClassificationHead + +__all__ = [ + "SegmentationModel", + "Conv2dReLU", + "Attention", + "SegmentationHead", + "ClassificationHead", +] diff --git a/feature_extractor_models/base/heads.py b/feature_extractor_models/base/heads.py new file mode 100644 index 0000000000000000000000000000000000000000..fbc939cad427fb6b9edfb02c0e841a68de84a03a --- /dev/null +++ b/feature_extractor_models/base/heads.py @@ -0,0 +1,34 @@ +import torch.nn as nn +from .modules import Activation + + +class SegmentationHead(nn.Sequential): + def __init__( + self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1 + ): + conv2d = nn.Conv2d( + in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2 + ) + upsampling = ( + nn.UpsamplingBilinear2d(scale_factor=upsampling) + if upsampling > 1 + else nn.Identity() + ) + activation = Activation(activation) + super().__init__(conv2d, upsampling, activation) + + +class ClassificationHead(nn.Sequential): + def __init__( + self, in_channels, classes, pooling="avg", dropout=0.2, activation=None + ): + if pooling not in ("max", "avg"): + raise ValueError( + "Pooling should be one of ('max', 'avg'), got {}.".format(pooling) + ) + pool = nn.AdaptiveAvgPool2d(1) if pooling == "avg" else nn.AdaptiveMaxPool2d(1) + flatten = nn.Flatten() + dropout = nn.Dropout(p=dropout, inplace=True) if dropout else nn.Identity() + linear = nn.Linear(in_channels, classes, bias=True) + activation = Activation(activation) + super().__init__(pool, flatten, dropout, linear, activation) diff --git a/feature_extractor_models/base/hub_mixin.py b/feature_extractor_models/base/hub_mixin.py new file mode 100644 index 0000000000000000000000000000000000000000..2f1056ea091c9dc3b774d8381d72d5fdb9eb74ea --- /dev/null +++ b/feature_extractor_models/base/hub_mixin.py @@ -0,0 +1,154 @@ +import json +from pathlib import Path +from typing import Optional, Union +from functools import wraps +from huggingface_hub import ( + PyTorchModelHubMixin, + ModelCard, + ModelCardData, + hf_hub_download, +) + + +MODEL_CARD = """ +--- +{{ card_data }} +--- +# {{ model_name }} Model Card + +Table of Contents: +- [Load trained model](#load-trained-model) +- [Model init parameters](#model-init-parameters) +- [Model metrics](#model-metrics) +- [Dataset](#dataset) + +## Load trained model +```python +import feature_extractor_models as smp + +model = smp.{{ model_name }}.from_pretrained("{{ save_directory | default("", true)}}") +``` + +## Model init parameters +```python +model_init_params = {{ model_parameters }} +``` + +## Model metrics +{{ metrics | default("[More Information Needed]", true) }} + +## Dataset +Dataset name: {{ dataset | default("[More Information Needed]", true) }} + +## More Information +- Library: {{ repo_url | default("[More Information Needed]", true) }} +- Docs: {{ docs_url | default("[More Information Needed]", true) }} + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) +""" + + +def _format_parameters(parameters: dict): + params = {k: v for k, v in parameters.items() if not k.startswith("_")} + params = [ + f'"{k}": {v}' if not isinstance(v, str) else f'"{k}": "{v}"' + for k, v in params.items() + ] + params = ",\n".join([f" {param}" for param in params]) + params = "{\n" + f"{params}" + "\n}" + return params + + +class SMPHubMixin(PyTorchModelHubMixin): + def generate_model_card(self, *args, **kwargs) -> ModelCard: + model_parameters_json = _format_parameters(self._hub_mixin_config) + directory = self._save_directory if hasattr(self, "_save_directory") else None + repo_id = self._repo_id if hasattr(self, "_repo_id") else None + repo_or_directory = repo_id if repo_id is not None else directory + + metrics = self._metrics if hasattr(self, "_metrics") else None + dataset = self._dataset if hasattr(self, "_dataset") else None + + if metrics is not None: + metrics = json.dumps(metrics, indent=4) + metrics = f"```json\n{metrics}\n```" + + model_card_data = ModelCardData( + languages=["python"], + library_name="segmentation-models-pytorch", + license="mit", + tags=["semantic-segmentation", "pytorch", "segmentation-models-pytorch"], + pipeline_tag="image-segmentation", + ) + model_card = ModelCard.from_template( + card_data=model_card_data, + template_str=MODEL_CARD, + repo_url="https://github.com/qubvel/segmentation_models.pytorch", + docs_url="https://smp.readthedocs.io/en/latest/", + model_parameters=model_parameters_json, + save_directory=repo_or_directory, + model_name=self.__class__.__name__, + metrics=metrics, + dataset=dataset, + ) + return model_card + + def _set_attrs_from_kwargs(self, attrs, kwargs): + for attr in attrs: + if attr in kwargs: + setattr(self, f"_{attr}", kwargs.pop(attr)) + + def _del_attrs(self, attrs): + for attr in attrs: + if hasattr(self, f"_{attr}"): + delattr(self, f"_{attr}") + + @wraps(PyTorchModelHubMixin.save_pretrained) + def save_pretrained( + self, save_directory: Union[str, Path], *args, **kwargs + ) -> Optional[str]: + # set additional attributes to be used in generate_model_card + self._save_directory = save_directory + self._set_attrs_from_kwargs(["metrics", "dataset"], kwargs) + + # set additional attribute to be used in from_pretrained + self._hub_mixin_config["_model_class"] = self.__class__.__name__ + + try: + # call the original save_pretrained + result = super().save_pretrained(save_directory, *args, **kwargs) + finally: + # delete the additional attributes + self._del_attrs(["save_directory", "metrics", "dataset"]) + self._hub_mixin_config.pop("_model_class") + + return result + + @wraps(PyTorchModelHubMixin.push_to_hub) + def push_to_hub(self, repo_id: str, *args, **kwargs): + self._repo_id = repo_id + self._set_attrs_from_kwargs(["metrics", "dataset"], kwargs) + result = super().push_to_hub(repo_id, *args, **kwargs) + self._del_attrs(["repo_id", "metrics", "dataset"]) + return result + + @property + def config(self): + return self._hub_mixin_config + + +@wraps(PyTorchModelHubMixin.from_pretrained) +def from_pretrained(pretrained_model_name_or_path: str, *args, **kwargs): + config_path = hf_hub_download( + pretrained_model_name_or_path, + filename="config.json", + revision=kwargs.get("revision", None), + ) + with open(config_path, "r") as f: + config = json.load(f) + model_class_name = config.pop("_model_class") + + import feature_extractor_models as smp + + model_class = getattr(smp, model_class_name) + return model_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs) diff --git a/feature_extractor_models/base/initialization.py b/feature_extractor_models/base/initialization.py new file mode 100644 index 0000000000000000000000000000000000000000..4bea4aa6aebf19ffbe1ccc0690566d01f91565ef --- /dev/null +++ b/feature_extractor_models/base/initialization.py @@ -0,0 +1,26 @@ +import torch.nn as nn + + +def initialize_decoder(module): + for m in module.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + +def initialize_head(module): + for m in module.modules(): + if isinstance(m, (nn.Linear, nn.Conv2d)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) diff --git a/feature_extractor_models/base/model.py b/feature_extractor_models/base/model.py new file mode 100644 index 0000000000000000000000000000000000000000..2724948e8e747296d0e03c9a8d557fd898b86ba5 --- /dev/null +++ b/feature_extractor_models/base/model.py @@ -0,0 +1,71 @@ +import torch + +from . import initialization as init +from .hub_mixin import SMPHubMixin +import torch.nn as nn + +class SegmentationModel(torch.nn.Module, SMPHubMixin): + def initialize(self): + + # self.out = nn.Sequential( + # nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), + # nn.BatchNorm2d(8), + # nn.ReLU(inplace=True), + # ) + init.initialize_decoder(self.decoder) + init.initialize_head(self.segmentation_head) + if self.classification_head is not None: + init.initialize_head(self.classification_head) + + def check_input_shape(self, x): + h, w = x.shape[-2:] + output_stride = self.encoder.output_stride + if h % output_stride != 0 or w % output_stride != 0: + new_h = ( + (h // output_stride + 1) * output_stride + if h % output_stride != 0 + else h + ) + new_w = ( + (w // output_stride + 1) * output_stride + if w % output_stride != 0 + else w + ) + raise RuntimeError( + f"Wrong input shape height={h}, width={w}. Expected image height and width " + f"divisible by {output_stride}. Consider pad your images to shape ({new_h}, {new_w})." + ) + + def forward(self, x): + """Sequentially pass `x` trough model`s encoder, decoder and heads""" + + self.check_input_shape(x) + + features = self.encoder(x) + decoder_output = self.decoder(*features) + + decoder_output = self.segmentation_head(decoder_output) + # + # if self.classification_head is not None: + # labels = self.classification_head(features[-1]) + # return masks, labels + + return decoder_output + + @torch.no_grad() + def predict(self, x): + """Inference method. Switch model to `eval` mode, call `.forward(x)` with `torch.no_grad()` + + Args: + x: 4D torch tensor with shape (batch_size, channels, height, width) + + Return: + prediction: 4D torch tensor with shape (batch_size, classes, height, width) + + """ + if self.training: + self.eval() + + x = self.forward(x) + + return x diff --git a/feature_extractor_models/base/modules.py b/feature_extractor_models/base/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..51c1400016cf519b57a31d7c602b0e7a11a9a670 --- /dev/null +++ b/feature_extractor_models/base/modules.py @@ -0,0 +1,131 @@ +import torch +import torch.nn as nn + +try: + from inplace_abn import InPlaceABN +except ImportError: + InPlaceABN = None + + +class Conv2dReLU(nn.Sequential): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + padding=0, + stride=1, + use_batchnorm=True, + ): + if use_batchnorm == "inplace" and InPlaceABN is None: + raise RuntimeError( + "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. " + + "To install see: https://github.com/mapillary/inplace_abn" + ) + + conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + bias=not (use_batchnorm), + ) + relu = nn.ReLU(inplace=True) + + if use_batchnorm == "inplace": + bn = InPlaceABN(out_channels, activation="leaky_relu", activation_param=0.0) + relu = nn.Identity() + + elif use_batchnorm and use_batchnorm != "inplace": + bn = nn.BatchNorm2d(out_channels) + + else: + bn = nn.Identity() + + super(Conv2dReLU, self).__init__(conv, bn, relu) + + +class SCSEModule(nn.Module): + def __init__(self, in_channels, reduction=16): + super().__init__() + self.cSE = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, in_channels // reduction, 1), + nn.ReLU(inplace=True), + nn.Conv2d(in_channels // reduction, in_channels, 1), + nn.Sigmoid(), + ) + self.sSE = nn.Sequential(nn.Conv2d(in_channels, 1, 1), nn.Sigmoid()) + + def forward(self, x): + return x * self.cSE(x) + x * self.sSE(x) + + +class ArgMax(nn.Module): + def __init__(self, dim=None): + super().__init__() + self.dim = dim + + def forward(self, x): + return torch.argmax(x, dim=self.dim) + + +class Clamp(nn.Module): + def __init__(self, min=0, max=1): + super().__init__() + self.min, self.max = min, max + + def forward(self, x): + return torch.clamp(x, self.min, self.max) + + +class Activation(nn.Module): + def __init__(self, name, **params): + super().__init__() + + if name is None or name == "identity": + self.activation = nn.Identity(**params) + elif name == "sigmoid": + self.activation = nn.Sigmoid() + elif name == "relu": + self.activation = nn.ReLU(inplace=True) + elif name == "softmax2d": + self.activation = nn.Softmax(dim=1, **params) + elif name == "softmax": + self.activation = nn.Softmax(**params) + elif name == "logsoftmax": + self.activation = nn.LogSoftmax(**params) + elif name == "tanh": + self.activation = nn.Tanh() + elif name == "argmax": + self.activation = ArgMax(**params) + elif name == "argmax2d": + self.activation = ArgMax(dim=1, **params) + elif name == "clamp": + self.activation = Clamp(**params) + elif callable(name): + self.activation = name(**params) + else: + raise ValueError( + f"Activation should be callable/sigmoid/softmax/logsoftmax/tanh/" + f"argmax/argmax2d/clamp/None; got {name}" + ) + + def forward(self, x): + return self.activation(x) + + +class Attention(nn.Module): + def __init__(self, name, **params): + super().__init__() + + if name is None: + self.attention = nn.Identity(**params) + elif name == "scse": + self.attention = SCSEModule(**params) + else: + raise ValueError("Attention {} is not implemented".format(name)) + + def forward(self, x): + return self.attention(x) diff --git a/feature_extractor_models/decoders/__init__.py b/feature_extractor_models/decoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/feature_extractor_models/decoders/deeplabv3/__init__.py b/feature_extractor_models/decoders/deeplabv3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..723fa4cc4cd98bf941d37e622dbc2fac5d5938a8 --- /dev/null +++ b/feature_extractor_models/decoders/deeplabv3/__init__.py @@ -0,0 +1,3 @@ +from .model import DeepLabV3, DeepLabV3Plus + +__all__ = ["DeepLabV3", "DeepLabV3Plus"] diff --git a/feature_extractor_models/decoders/deeplabv3/decoder.py b/feature_extractor_models/decoders/deeplabv3/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..2bec43c98d3c7d002ea357e48682d8a9af48b145 --- /dev/null +++ b/feature_extractor_models/decoders/deeplabv3/decoder.py @@ -0,0 +1,220 @@ +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import torch +from torch import nn +from torch.nn import functional as F + +__all__ = ["DeepLabV3Decoder"] + + +class DeepLabV3Decoder(nn.Sequential): + def __init__(self, in_channels, out_channels=256, atrous_rates=(12, 24, 36)): + super().__init__( + ASPP(in_channels, out_channels, atrous_rates), + nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + self.out_channels = out_channels + + def forward(self, *features): + return super().forward(features[-1]) + + +class DeepLabV3PlusDecoder(nn.Module): + def __init__( + self, + encoder_channels, + out_channels=256, + atrous_rates=(12, 24, 36), + output_stride=16, + ): + super().__init__() + if output_stride not in {8, 16}: + raise ValueError( + "Output stride should be 8 or 16, got {}.".format(output_stride) + ) + + self.out_channels = out_channels + self.output_stride = output_stride + + self.aspp = nn.Sequential( + ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True), + SeparableConv2d( + out_channels, out_channels, kernel_size=3, padding=1, bias=False + ), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + + scale_factor = 2 if output_stride == 8 else 4 + self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor) + + highres_in_channels = encoder_channels[-4] + highres_out_channels = 48 # proposed by authors of paper + self.block1 = nn.Sequential( + nn.Conv2d( + highres_in_channels, highres_out_channels, kernel_size=1, bias=False + ), + nn.BatchNorm2d(highres_out_channels), + nn.ReLU(), + ) + self.block2 = nn.Sequential( + SeparableConv2d( + highres_out_channels + out_channels, + out_channels, + kernel_size=3, + padding=1, + bias=False, + ), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + + def forward(self, *features): + aspp_features = self.aspp(features[-1]) + aspp_features = self.up(aspp_features) + high_res_features = self.block1(features[-4]) + concat_features = torch.cat([aspp_features, high_res_features], dim=1) + fused_features = self.block2(concat_features) + return fused_features + + +class ASPPConv(nn.Sequential): + def __init__(self, in_channels, out_channels, dilation): + super().__init__( + nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + padding=dilation, + dilation=dilation, + bias=False, + ), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + + +class ASPPSeparableConv(nn.Sequential): + def __init__(self, in_channels, out_channels, dilation): + super().__init__( + SeparableConv2d( + in_channels, + out_channels, + kernel_size=3, + padding=dilation, + dilation=dilation, + bias=False, + ), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + + +class ASPPPooling(nn.Sequential): + def __init__(self, in_channels, out_channels): + super().__init__( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + + def forward(self, x): + size = x.shape[-2:] + for mod in self: + x = mod(x) + return F.interpolate(x, size=size, mode="bilinear", align_corners=False) + + +class ASPP(nn.Module): + def __init__(self, in_channels, out_channels, atrous_rates, separable=False): + super(ASPP, self).__init__() + modules = [] + modules.append( + nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + ) + ) + + rate1, rate2, rate3 = tuple(atrous_rates) + ASPPConvModule = ASPPConv if not separable else ASPPSeparableConv + + modules.append(ASPPConvModule(in_channels, out_channels, rate1)) + modules.append(ASPPConvModule(in_channels, out_channels, rate2)) + modules.append(ASPPConvModule(in_channels, out_channels, rate3)) + modules.append(ASPPPooling(in_channels, out_channels)) + + self.convs = nn.ModuleList(modules) + + self.project = nn.Sequential( + nn.Conv2d(5 * out_channels, out_channels, kernel_size=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + nn.Dropout(0.5), + ) + + def forward(self, x): + res = [] + for conv in self.convs: + res.append(conv(x)) + res = torch.cat(res, dim=1) + return self.project(res) + + +class SeparableConv2d(nn.Sequential): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + bias=True, + ): + dephtwise_conv = nn.Conv2d( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + bias=False, + ) + pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) + super().__init__(dephtwise_conv, pointwise_conv) diff --git a/feature_extractor_models/decoders/deeplabv3/model.py b/feature_extractor_models/decoders/deeplabv3/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f23c9167e98c0091d76bf388bcd51d481f1a361e --- /dev/null +++ b/feature_extractor_models/decoders/deeplabv3/model.py @@ -0,0 +1,178 @@ +from typing import Optional + +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from feature_extractor_models.encoders import get_encoder +from .decoder import DeepLabV3Decoder, DeepLabV3PlusDecoder + + +class DeepLabV3(SegmentationModel): + """DeepLabV3_ implementation from "Rethinking Atrous Convolution for Semantic Image Segmentation" + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_channels: A number of convolution filters in ASPP module. Default is 256 + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + Returns: + ``torch.nn.Module``: **DeepLabV3** + + .. _DeeplabV3: + https://arxiv.org/abs/1706.05587 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_channels: int = 256, + in_channels: int = 3, + classes: int = 1, + activation: Optional[str] = None, + upsampling: int = 8, + aux_params: Optional[dict] = None, + ): + super().__init__() + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + output_stride=8, + ) + + self.decoder = DeepLabV3Decoder( + in_channels=self.encoder.out_channels[-1], out_channels=decoder_channels + ) + + self.segmentation_head = SegmentationHead( + in_channels=self.decoder.out_channels, + out_channels=classes, + activation=activation, + kernel_size=1, + upsampling=upsampling, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + +class DeepLabV3Plus(SegmentationModel): + """DeepLabV3+ implementation from "Encoder-Decoder with Atrous Separable + Convolution for Semantic Image Segmentation" + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation) + decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values) + decoder_channels: A number of convolution filters in ASPP module. Default is 256 + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + Returns: + ``torch.nn.Module``: **DeepLabV3Plus** + + Reference: + https://arxiv.org/abs/1802.02611v3 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + encoder_output_stride: int = 16, + decoder_channels: int = 256, + decoder_atrous_rates: tuple = (12, 24, 36), + in_channels: int = 3, + classes: int = 1, + activation: Optional[str] = None, + upsampling: int = 4, + aux_params: Optional[dict] = None, + ): + super().__init__() + + if encoder_output_stride not in [8, 16]: + raise ValueError( + "Encoder output stride should be 8 or 16, got {}".format( + encoder_output_stride + ) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + output_stride=encoder_output_stride, + ) + + self.decoder = DeepLabV3PlusDecoder( + encoder_channels=self.encoder.out_channels, + out_channels=decoder_channels, + atrous_rates=decoder_atrous_rates, + output_stride=encoder_output_stride, + ) + + self.segmentation_head = SegmentationHead( + in_channels=self.decoder.out_channels, + out_channels=classes, + activation=activation, + kernel_size=1, + upsampling=upsampling, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None diff --git a/feature_extractor_models/decoders/fpn/__init__.py b/feature_extractor_models/decoders/fpn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad4be9db3bbd683a825e6dd17b84bf72f0228b16 --- /dev/null +++ b/feature_extractor_models/decoders/fpn/__init__.py @@ -0,0 +1,3 @@ +from .model import FPN + +__all__ = ["FPN"] diff --git a/feature_extractor_models/decoders/fpn/decoder.py b/feature_extractor_models/decoders/fpn/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..766190f45c32c6f9e4eb6d03632585c339fb81fa --- /dev/null +++ b/feature_extractor_models/decoders/fpn/decoder.py @@ -0,0 +1,133 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Conv3x3GNReLU(nn.Module): + def __init__(self, in_channels, out_channels, upsample=False): + super().__init__() + self.upsample = upsample + self.block = nn.Sequential( + nn.Conv2d( + in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False + ), + nn.GroupNorm(32, out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, x): + x = self.block(x) + if self.upsample: + x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True) + return x + + +class FPNBlock(nn.Module): + def __init__(self, pyramid_channels, skip_channels): + super().__init__() + self.skip_conv = nn.Conv2d(skip_channels, pyramid_channels, kernel_size=1) + + def forward(self, x, skip=None): + x = F.interpolate(x, scale_factor=2, mode="nearest") + skip = self.skip_conv(skip) + x = x + skip + return x + + +class SegmentationBlock(nn.Module): + def __init__(self, in_channels, out_channels, n_upsamples=0): + super().__init__() + + blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))] + + if n_upsamples > 1: + for _ in range(1, n_upsamples): + blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True)) + + self.block = nn.Sequential(*blocks) + + def forward(self, x): + return self.block(x) + + +class MergeBlock(nn.Module): + def __init__(self, policy): + super().__init__() + if policy not in ["add", "cat"]: + raise ValueError( + "`merge_policy` must be one of: ['add', 'cat'], got {}".format(policy) + ) + self.policy = policy + + def forward(self, x): + if self.policy == "add": + return sum(x) + elif self.policy == "cat": + return torch.cat(x, dim=1) + else: + raise ValueError( + "`merge_policy` must be one of: ['add', 'cat'], got {}".format( + self.policy + ) + ) + + +class FPNDecoder(nn.Module): + def __init__( + self, + encoder_channels, + encoder_depth=5, + pyramid_channels=256, + segmentation_channels=128, + dropout=0.2, + merge_policy="add", + ): + super().__init__() + + self.out_channels = ( + segmentation_channels + if merge_policy == "add" + else segmentation_channels * 4 + ) + if encoder_depth < 3: + raise ValueError( + "Encoder depth for FPN decoder cannot be less than 3, got {}.".format( + encoder_depth + ) + ) + + encoder_channels = encoder_channels[::-1] + encoder_channels = encoder_channels[: encoder_depth + 1] + + self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1) + self.p4 = FPNBlock(pyramid_channels, encoder_channels[1]) + self.p3 = FPNBlock(pyramid_channels, encoder_channels[2]) + self.p2 = FPNBlock(pyramid_channels, encoder_channels[3]) + + self.seg_blocks = nn.ModuleList( + [ + SegmentationBlock( + pyramid_channels, segmentation_channels, n_upsamples=n_upsamples + ) + for n_upsamples in [3, 2, 1, 0] + ] + ) + + self.merge = MergeBlock(merge_policy) + self.dropout = nn.Dropout2d(p=dropout, inplace=True) + + def forward(self, *features): + c2, c3, c4, c5 = features[-4:] + + p5 = self.p5(c5) + p4 = self.p4(p5, c4) + p3 = self.p3(p4, c3) + p2 = self.p2(p3, c2) + + feature_pyramid = [ + seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2]) + ] + x = self.merge(feature_pyramid) + x = self.dropout(x) + + return x diff --git a/feature_extractor_models/decoders/fpn/model.py b/feature_extractor_models/decoders/fpn/model.py new file mode 100644 index 0000000000000000000000000000000000000000..5da47a65185a2dd679bd615bfada0fa757873ad6 --- /dev/null +++ b/feature_extractor_models/decoders/fpn/model.py @@ -0,0 +1,107 @@ +from typing import Optional + +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from feature_extractor_models.encoders import get_encoder +from .decoder import FPNDecoder + + +class FPN(SegmentationModel): + """FPN_ is a fully convolution neural network for image semantic segmentation. + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_ + decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_ + decoder_merge_policy: Determines how to merge pyramid features inside FPN. Available options are **add** + and **cat** + decoder_dropout: Spatial dropout rate in range (0, 1) for feature pyramid in FPN_ + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **FPN** + + .. _FPN: + http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_pyramid_channels: int = 256, + decoder_segmentation_channels: int = 128, + decoder_merge_policy: str = "add", + decoder_dropout: float = 0.2, + in_channels: int = 3, + classes: int = 1, + activation: Optional[str] = None, + upsampling: int = 4, + aux_params: Optional[dict] = None, + ): + super().__init__() + + # validate input params + if encoder_name.startswith("mit_b") and encoder_depth != 5: + raise ValueError( + "Encoder {} support only encoder_depth=5".format(encoder_name) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = FPNDecoder( + encoder_channels=self.encoder.out_channels, + encoder_depth=encoder_depth, + pyramid_channels=decoder_pyramid_channels, + segmentation_channels=decoder_segmentation_channels, + dropout=decoder_dropout, + merge_policy=decoder_merge_policy, + ) + + self.segmentation_head = SegmentationHead( + in_channels=self.decoder.out_channels, + out_channels=classes, + activation=activation, + kernel_size=1, + upsampling=upsampling, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "fpn-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/lightfpn/__init__.py b/feature_extractor_models/decoders/lightfpn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9741e035896abe645de6760c1b756f7f7bbe761 --- /dev/null +++ b/feature_extractor_models/decoders/lightfpn/__init__.py @@ -0,0 +1,3 @@ +from .model import LightFPN + +__all__ = ["LightFPN"] diff --git a/feature_extractor_models/decoders/lightfpn/decoder.py b/feature_extractor_models/decoders/lightfpn/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..f19d5ae408026f8ae08c8b69d37aa61578b7278c --- /dev/null +++ b/feature_extractor_models/decoders/lightfpn/decoder.py @@ -0,0 +1,144 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Conv3x3GNReLU(nn.Module): + def __init__(self, in_channels, out_channels, upsample=False): + super().__init__() + self.upsample = upsample + self.block = nn.Sequential( + nn.Conv2d( + in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False + ), + nn.GroupNorm(32, out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, x): + x = self.block(x) + if self.upsample: + x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True) + return x + + +class DepthwiseSeparableConv2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0): + super().__init__() + self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels) + self.pointwise = nn.Conv2d(in_channels, out_channels, 1) + + def forward(self, x): + x = self.depthwise(x) + x = self.pointwise(x) + return x + +class LightFPNBlock(nn.Module): + def __init__(self, pyramid_channels, skip_channels): + super().__init__() + self.skip_conv = DepthwiseSeparableConv2d(skip_channels, pyramid_channels, kernel_size=1) + + def forward(self, x, skip=None): + x = F.interpolate(x, scale_factor=2, mode="nearest") + skip = self.skip_conv(skip) + x = x + skip + return x + + +class SegmentationBlock(nn.Module): + def __init__(self, in_channels, out_channels, n_upsamples=0): + super().__init__() + + blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))] + + if n_upsamples > 1: + for _ in range(1, n_upsamples): + blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True)) + + self.block = nn.Sequential(*blocks) + + def forward(self, x): + return self.block(x) + + +class MergeBlock(nn.Module): + def __init__(self, policy): + super().__init__() + if policy not in ["add", "cat"]: + raise ValueError( + "`merge_policy` must be one of: ['add', 'cat'], got {}".format(policy) + ) + self.policy = policy + + def forward(self, x): + if self.policy == "add": + return sum(x) + elif self.policy == "cat": + return torch.cat(x, dim=1) + else: + raise ValueError( + "`merge_policy` must be one of: ['add', 'cat'], got {}".format( + self.policy + ) + ) + + +class FPNDecoder(nn.Module): + def __init__( + self, + encoder_channels, + encoder_depth=5, + pyramid_channels=256, + segmentation_channels=128, + dropout=0.2, + merge_policy="add", + ): + super().__init__() + + self.out_channels = ( + segmentation_channels + if merge_policy == "add" + else segmentation_channels * 4 + ) + if encoder_depth < 3: + raise ValueError( + "Encoder depth for FPN decoder cannot be less than 3, got {}.".format( + encoder_depth + ) + ) + + encoder_channels = encoder_channels[::-1] + encoder_channels = encoder_channels[: encoder_depth + 1] + + self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1) + self.p4 = LightFPNBlock(pyramid_channels, encoder_channels[1]) + self.p3 = LightFPNBlock(pyramid_channels, encoder_channels[2]) + self.p2 = LightFPNBlock(pyramid_channels, encoder_channels[3]) + + self.seg_blocks = nn.ModuleList( + [ + SegmentationBlock( + pyramid_channels, segmentation_channels, n_upsamples=n_upsamples + ) + for n_upsamples in [3, 2, 1, 0] + ] + ) + + self.merge = MergeBlock(merge_policy) + self.dropout = nn.Dropout2d(p=dropout, inplace=True) + + def forward(self, *features): + c2, c3, c4, c5 = features[-4:] + + p5 = self.p5(c5) + p4 = self.p4(p5, c4) + p3 = self.p3(p4, c3) + p2 = self.p2(p3, c2) + + feature_pyramid = [ + seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2]) + ] + x = self.merge(feature_pyramid) + x = self.dropout(x) + + return x diff --git a/feature_extractor_models/decoders/lightfpn/model.py b/feature_extractor_models/decoders/lightfpn/model.py new file mode 100644 index 0000000000000000000000000000000000000000..da19ea13add202458387ee40b55b82ed5c309dca --- /dev/null +++ b/feature_extractor_models/decoders/lightfpn/model.py @@ -0,0 +1,107 @@ +from typing import Optional + +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from feature_extractor_models.encoders import get_encoder +from .decoder import FPNDecoder + + +class LightFPN(SegmentationModel): + """FPN_ is a fully convolution neural network for image semantic segmentation. + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_ + decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_ + decoder_merge_policy: Determines how to merge pyramid features inside FPN. Available options are **add** + and **cat** + decoder_dropout: Spatial dropout rate in range (0, 1) for feature pyramid in FPN_ + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **FPN** + + .. _FPN: + http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_pyramid_channels: int = 256, + decoder_segmentation_channels: int = 128, + decoder_merge_policy: str = "add", + decoder_dropout: float = 0.2, + in_channels: int = 3, + classes: int = 1, + activation: Optional[str] = None, + upsampling: int = 4, + aux_params: Optional[dict] = None, + ): + super().__init__() + + # validate input params + if encoder_name.startswith("mit_b") and encoder_depth != 5: + raise ValueError( + "Encoder {} support only encoder_depth=5".format(encoder_name) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = FPNDecoder( + encoder_channels=self.encoder.out_channels, + encoder_depth=encoder_depth, + pyramid_channels=decoder_pyramid_channels, + segmentation_channels=decoder_segmentation_channels, + dropout=decoder_dropout, + merge_policy=decoder_merge_policy, + ) + + self.segmentation_head = SegmentationHead( + in_channels=self.decoder.out_channels, + out_channels=classes, + activation=activation, + kernel_size=1, + upsampling=upsampling, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "fpn-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/linknet/__init__.py b/feature_extractor_models/decoders/linknet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0e662f80411191b67a1e579efc7e14ef9f15db09 --- /dev/null +++ b/feature_extractor_models/decoders/linknet/__init__.py @@ -0,0 +1,3 @@ +from .model import Linknet + +__all__ = ["Linknet"] diff --git a/feature_extractor_models/decoders/linknet/decoder.py b/feature_extractor_models/decoders/linknet/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..c70b9c9310373734749c7554780d794305430bd0 --- /dev/null +++ b/feature_extractor_models/decoders/linknet/decoder.py @@ -0,0 +1,82 @@ +import torch.nn as nn + +from feature_extractor_models.base import modules + + +class TransposeX2(nn.Sequential): + def __init__(self, in_channels, out_channels, use_batchnorm=True): + super().__init__() + layers = [ + nn.ConvTranspose2d( + in_channels, out_channels, kernel_size=4, stride=2, padding=1 + ), + nn.ReLU(inplace=True), + ] + + if use_batchnorm: + layers.insert(1, nn.BatchNorm2d(out_channels)) + + super().__init__(*layers) + + +class DecoderBlock(nn.Module): + def __init__(self, in_channels, out_channels, use_batchnorm=True): + super().__init__() + + self.block = nn.Sequential( + modules.Conv2dReLU( + in_channels, + in_channels // 4, + kernel_size=1, + use_batchnorm=use_batchnorm, + ), + TransposeX2( + in_channels // 4, in_channels // 4, use_batchnorm=use_batchnorm + ), + modules.Conv2dReLU( + in_channels // 4, + out_channels, + kernel_size=1, + use_batchnorm=use_batchnorm, + ), + ) + + def forward(self, x, skip=None): + x = self.block(x) + if skip is not None: + x = x + skip + return x + + +class LinknetDecoder(nn.Module): + def __init__( + self, encoder_channels, prefinal_channels=32, n_blocks=5, use_batchnorm=True + ): + super().__init__() + + # remove first skip + encoder_channels = encoder_channels[1:] + # reverse channels to start from head of encoder + encoder_channels = encoder_channels[::-1] + + channels = list(encoder_channels) + [prefinal_channels] + + self.blocks = nn.ModuleList( + [ + DecoderBlock(channels[i], channels[i + 1], use_batchnorm=use_batchnorm) + for i in range(n_blocks) + ] + ) + + def forward(self, *features): + features = features[1:] # remove first skip + features = features[::-1] # reverse channels to start from head of encoder + + x = features[0] + skips = features[1:] + + for i, decoder_block in enumerate(self.blocks): + skip = skips[i] if i < len(skips) else None + x = decoder_block(x, skip) + + return x diff --git a/feature_extractor_models/decoders/linknet/model.py b/feature_extractor_models/decoders/linknet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..b73af76157d5325d4b0cb83c06ca17d3ad4a8727 --- /dev/null +++ b/feature_extractor_models/decoders/linknet/model.py @@ -0,0 +1,98 @@ +from typing import Optional, Union + +from feature_extractor_models.base import ( + SegmentationHead, + SegmentationModel, + ClassificationHead, +) +from feature_extractor_models.encoders import get_encoder +from .decoder import LinknetDecoder + + +class Linknet(SegmentationModel): + """Linknet_ is a fully convolution neural network for image semantic segmentation. Consist of *encoder* + and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial + resolution (skip connections) which are used by decoder to define accurate segmentation mask. Use *sum* + for fusing decoder blocks with skip connections. + + Note: + This implementation by default has 4 skip connections (original - 3). + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers + is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption. + Available options are **True, False, "inplace"** + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **Linknet** + + .. _Linknet: + https://arxiv.org/abs/1707.03718 + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_use_batchnorm: bool = True, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + aux_params: Optional[dict] = None, + ): + super().__init__() + + if encoder_name.startswith("mit_b"): + raise ValueError( + "Encoder `{}` is not supported for Linknet".format(encoder_name) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = LinknetDecoder( + encoder_channels=self.encoder.out_channels, + n_blocks=encoder_depth, + prefinal_channels=32, + use_batchnorm=decoder_use_batchnorm, + ) + + self.segmentation_head = SegmentationHead( + in_channels=32, out_channels=classes, activation=activation, kernel_size=1 + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "link-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/manet/__init__.py b/feature_extractor_models/decoders/manet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a866f2c50d8bb6117edd9f4949e5e114e799dc2b --- /dev/null +++ b/feature_extractor_models/decoders/manet/__init__.py @@ -0,0 +1,3 @@ +from .model import MAnet + +__all__ = ["MAnet"] diff --git a/feature_extractor_models/decoders/manet/decoder.py b/feature_extractor_models/decoders/manet/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..61d8ccbe5e0afd1792e9efa5b1ba7aacab2b1794 --- /dev/null +++ b/feature_extractor_models/decoders/manet/decoder.py @@ -0,0 +1,187 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from feature_extractor_models.base import modules as md + + +class PAB(nn.Module): + def __init__(self, in_channels, out_channels, pab_channels=64): + super(PAB, self).__init__() + # Series of 1x1 conv to generate attention feature maps + self.pab_channels = pab_channels + self.in_channels = in_channels + self.top_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1) + self.center_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1) + self.bottom_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1) + self.map_softmax = nn.Softmax(dim=1) + self.out_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1) + + def forward(self, x): + bsize = x.size()[0] + h = x.size()[2] + w = x.size()[3] + x_top = self.top_conv(x) + x_center = self.center_conv(x) + x_bottom = self.bottom_conv(x) + + x_top = x_top.flatten(2) + x_center = x_center.flatten(2).transpose(1, 2) + x_bottom = x_bottom.flatten(2).transpose(1, 2) + + sp_map = torch.matmul(x_center, x_top) + sp_map = self.map_softmax(sp_map.view(bsize, -1)).view(bsize, h * w, h * w) + sp_map = torch.matmul(sp_map, x_bottom) + sp_map = sp_map.reshape(bsize, self.in_channels, h, w) + x = x + sp_map + x = self.out_conv(x) + return x + + +class MFAB(nn.Module): + def __init__( + self, in_channels, skip_channels, out_channels, use_batchnorm=True, reduction=16 + ): + # MFAB is just a modified version of SE-blocks, one for skip, one for input + super(MFAB, self).__init__() + self.hl_conv = nn.Sequential( + md.Conv2dReLU( + in_channels, + in_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ), + md.Conv2dReLU( + in_channels, skip_channels, kernel_size=1, use_batchnorm=use_batchnorm + ), + ) + reduced_channels = max(1, skip_channels // reduction) + self.SE_ll = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(skip_channels, reduced_channels, 1), + nn.ReLU(inplace=True), + nn.Conv2d(reduced_channels, skip_channels, 1), + nn.Sigmoid(), + ) + self.SE_hl = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(skip_channels, reduced_channels, 1), + nn.ReLU(inplace=True), + nn.Conv2d(reduced_channels, skip_channels, 1), + nn.Sigmoid(), + ) + self.conv1 = md.Conv2dReLU( + skip_channels + + skip_channels, # we transform C-prime form high level to C from skip connection + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + + def forward(self, x, skip=None): + x = self.hl_conv(x) + x = F.interpolate(x, scale_factor=2, mode="nearest") + attention_hl = self.SE_hl(x) + if skip is not None: + attention_ll = self.SE_ll(skip) + attention_hl = attention_hl + attention_ll + x = x * attention_hl + x = torch.cat([x, skip], dim=1) + x = self.conv1(x) + x = self.conv2(x) + return x + + +class DecoderBlock(nn.Module): + def __init__(self, in_channels, skip_channels, out_channels, use_batchnorm=True): + super().__init__() + self.conv1 = md.Conv2dReLU( + in_channels + skip_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + + def forward(self, x, skip=None): + x = F.interpolate(x, scale_factor=2, mode="nearest") + if skip is not None: + x = torch.cat([x, skip], dim=1) + x = self.conv1(x) + x = self.conv2(x) + return x + + +class MAnetDecoder(nn.Module): + def __init__( + self, + encoder_channels, + decoder_channels, + n_blocks=5, + reduction=16, + use_batchnorm=True, + pab_channels=64, + ): + super().__init__() + + if n_blocks != len(decoder_channels): + raise ValueError( + "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format( + n_blocks, len(decoder_channels) + ) + ) + + # remove first skip with same spatial resolution + encoder_channels = encoder_channels[1:] + + # reverse channels to start from head of encoder + encoder_channels = encoder_channels[::-1] + + # computing blocks input and output channels + head_channels = encoder_channels[0] + in_channels = [head_channels] + list(decoder_channels[:-1]) + skip_channels = list(encoder_channels[1:]) + [0] + out_channels = decoder_channels + + self.center = PAB(head_channels, head_channels, pab_channels=pab_channels) + + # combine decoder keyword arguments + kwargs = dict(use_batchnorm=use_batchnorm) # no attention type here + blocks = [ + MFAB(in_ch, skip_ch, out_ch, reduction=reduction, **kwargs) + if skip_ch > 0 + else DecoderBlock(in_ch, skip_ch, out_ch, **kwargs) + for in_ch, skip_ch, out_ch in zip(in_channels, skip_channels, out_channels) + ] + # for the last we dont have skip connection -> use simple decoder block + self.blocks = nn.ModuleList(blocks) + + def forward(self, *features): + features = features[1:] # remove first skip with same spatial resolution + features = features[::-1] # reverse channels to start from head of encoder + + head = features[0] + skips = features[1:] + + x = self.center(head) + for i, decoder_block in enumerate(self.blocks): + skip = skips[i] if i < len(skips) else None + x = decoder_block(x, skip) + + return x diff --git a/feature_extractor_models/decoders/manet/model.py b/feature_extractor_models/decoders/manet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..a2ecac92af47734c23678f4993f7bd6f043ca0f4 --- /dev/null +++ b/feature_extractor_models/decoders/manet/model.py @@ -0,0 +1,102 @@ +from typing import Optional, Union, List + +from feature_extractor_models.encoders import get_encoder +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from .decoder import MAnetDecoder + + +class MAnet(SegmentationModel): + """MAnet_ : Multi-scale Attention Net. The MA-Net can capture rich contextual dependencies based on + the attention mechanism, using two blocks: + - Position-wise Attention Block (PAB), which captures the spatial dependencies between pixels in a global view + - Multi-scale Fusion Attention Block (MFAB), which captures the channel dependencies between any feature map by + multi-scale semantic feature fusion + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder. + Length of the list should be the same as **encoder_depth** + decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers + is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption. + Available options are **True, False, "inplace"** + decoder_pab_channels: A number of channels for PAB module in decoder. + Default is 64. + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **MAnet** + + .. _MAnet: + https://ieeexplore.ieee.org/abstract/document/9201310 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_use_batchnorm: bool = True, + decoder_channels: List[int] = (256, 128, 64, 32, 16), + decoder_pab_channels: int = 64, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + aux_params: Optional[dict] = None, + ): + super().__init__() + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = MAnetDecoder( + encoder_channels=self.encoder.out_channels, + decoder_channels=decoder_channels, + n_blocks=encoder_depth, + use_batchnorm=decoder_use_batchnorm, + pab_channels=decoder_pab_channels, + ) + + self.segmentation_head = SegmentationHead( + in_channels=decoder_channels[-1], + out_channels=classes, + activation=activation, + kernel_size=3, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "manet-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/pan/__init__.py b/feature_extractor_models/decoders/pan/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8710438531d7567b66d486d5537397f6f0ba1253 --- /dev/null +++ b/feature_extractor_models/decoders/pan/__init__.py @@ -0,0 +1,3 @@ +from .model import PAN + +__all__ = ["PAN"] diff --git a/feature_extractor_models/decoders/pan/decoder.py b/feature_extractor_models/decoders/pan/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..ab8f8675a1b5dc6ed3447aef63caeb7d96331529 --- /dev/null +++ b/feature_extractor_models/decoders/pan/decoder.py @@ -0,0 +1,208 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ConvBnRelu(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + groups: int = 1, + bias: bool = True, + add_relu: bool = True, + interpolate: bool = False, + ): + super(ConvBnRelu, self).__init__() + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias, + groups=groups, + ) + self.add_relu = add_relu + self.interpolate = interpolate + self.bn = nn.BatchNorm2d(out_channels) + self.activation = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.add_relu: + x = self.activation(x) + if self.interpolate: + x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True) + return x + + +class FPABlock(nn.Module): + def __init__(self, in_channels, out_channels, upscale_mode="bilinear"): + super(FPABlock, self).__init__() + + self.upscale_mode = upscale_mode + if self.upscale_mode == "bilinear": + self.align_corners = True + else: + self.align_corners = False + + # global pooling branch + self.branch1 = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvBnRelu( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + ), + ) + + # midddle branch + self.mid = nn.Sequential( + ConvBnRelu( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + ) + ) + self.down1 = nn.Sequential( + nn.MaxPool2d(kernel_size=2, stride=2), + ConvBnRelu( + in_channels=in_channels, + out_channels=1, + kernel_size=7, + stride=1, + padding=3, + ), + ) + self.down2 = nn.Sequential( + nn.MaxPool2d(kernel_size=2, stride=2), + ConvBnRelu( + in_channels=1, out_channels=1, kernel_size=5, stride=1, padding=2 + ), + ) + self.down3 = nn.Sequential( + nn.MaxPool2d(kernel_size=2, stride=2), + ConvBnRelu( + in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1 + ), + ConvBnRelu( + in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1 + ), + ) + self.conv2 = ConvBnRelu( + in_channels=1, out_channels=1, kernel_size=5, stride=1, padding=2 + ) + self.conv1 = ConvBnRelu( + in_channels=1, out_channels=1, kernel_size=7, stride=1, padding=3 + ) + + def forward(self, x): + h, w = x.size(2), x.size(3) + b1 = self.branch1(x) + upscale_parameters = dict( + mode=self.upscale_mode, align_corners=self.align_corners + ) + b1 = F.interpolate(b1, size=(h, w), **upscale_parameters) + + mid = self.mid(x) + x1 = self.down1(x) + x2 = self.down2(x1) + x3 = self.down3(x2) + x3 = F.interpolate(x3, size=(h // 4, w // 4), **upscale_parameters) + + x2 = self.conv2(x2) + x = x2 + x3 + x = F.interpolate(x, size=(h // 2, w // 2), **upscale_parameters) + + x1 = self.conv1(x1) + x = x + x1 + x = F.interpolate(x, size=(h, w), **upscale_parameters) + + x = torch.mul(x, mid) + x = x + b1 + return x + + +class GAUBlock(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, upscale_mode: str = "bilinear" + ): + super(GAUBlock, self).__init__() + + self.upscale_mode = upscale_mode + self.align_corners = True if upscale_mode == "bilinear" else None + + self.conv1 = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvBnRelu( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=1, + add_relu=False, + ), + nn.Sigmoid(), + ) + self.conv2 = ConvBnRelu( + in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1 + ) + + def forward(self, x, y): + """ + Args: + x: low level feature + y: high level feature + """ + h, w = x.size(2), x.size(3) + y_up = F.interpolate( + y, size=(h, w), mode=self.upscale_mode, align_corners=self.align_corners + ) + x = self.conv2(x) + y = self.conv1(y) + z = torch.mul(x, y) + return y_up + z + + +class PANDecoder(nn.Module): + def __init__( + self, encoder_channels, decoder_channels, upscale_mode: str = "bilinear" + ): + super().__init__() + + self.fpa = FPABlock( + in_channels=encoder_channels[-1], out_channels=decoder_channels + ) + self.gau3 = GAUBlock( + in_channels=encoder_channels[-2], + out_channels=decoder_channels, + upscale_mode=upscale_mode, + ) + self.gau2 = GAUBlock( + in_channels=encoder_channels[-3], + out_channels=decoder_channels, + upscale_mode=upscale_mode, + ) + self.gau1 = GAUBlock( + in_channels=encoder_channels[-4], + out_channels=decoder_channels, + upscale_mode=upscale_mode, + ) + + def forward(self, *features): + bottleneck = features[-1] + x5 = self.fpa(bottleneck) # 1/32 + x4 = self.gau3(features[-2], x5) # 1/16 + x3 = self.gau2(features[-3], x4) # 1/8 + x2 = self.gau1(features[-4], x3) # 1/4 + + return x2 diff --git a/feature_extractor_models/decoders/pan/model.py b/feature_extractor_models/decoders/pan/model.py new file mode 100644 index 0000000000000000000000000000000000000000..ec3e1ad033eb9850e83e4024f1894a0b6434ad7a --- /dev/null +++ b/feature_extractor_models/decoders/pan/model.py @@ -0,0 +1,100 @@ +from typing import Optional, Union + +from feature_extractor_models.encoders import get_encoder +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from .decoder import PANDecoder + + +class PAN(SegmentationModel): + """Implementation of PAN_ (Pyramid Attention Network). + + Note: + Currently works with shape of input tensor >= [B x C x 128 x 128] for pytorch <= 1.1.0 + and with shape of input tensor >= [B x C x 256 x 256] for pytorch == 1.3.1 + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + encoder_output_stride: 16 or 32, if 16 use dilation in encoder last layer. + Doesn't work with ***ception***, **vgg***, **densenet*`** backbones.Default is 16. + decoder_channels: A number of convolution layer filters in decoder blocks + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **PAN** + + .. _PAN: + https://arxiv.org/abs/1805.10180 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_weights: Optional[str] = "imagenet", + encoder_output_stride: int = 16, + decoder_channels: int = 32, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + upsampling: int = 4, + aux_params: Optional[dict] = None, + ): + super().__init__() + + if encoder_output_stride not in [16, 32]: + raise ValueError( + "PAN support output stride 16 or 32, got {}".format( + encoder_output_stride + ) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=5, + weights=encoder_weights, + output_stride=encoder_output_stride, + ) + + self.decoder = PANDecoder( + encoder_channels=self.encoder.out_channels, + decoder_channels=decoder_channels, + ) + + self.segmentation_head = SegmentationHead( + in_channels=decoder_channels, + out_channels=classes, + activation=activation, + kernel_size=3, + upsampling=upsampling, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "pan-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/pspnet/__init__.py b/feature_extractor_models/decoders/pspnet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b8922eb35c50c4cedadd987a402189d3721bb65 --- /dev/null +++ b/feature_extractor_models/decoders/pspnet/__init__.py @@ -0,0 +1,3 @@ +from .model import PSPNet + +__all__ = ["PSPNet"] diff --git a/feature_extractor_models/decoders/pspnet/decoder.py b/feature_extractor_models/decoders/pspnet/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..eda408400512b97b78604640bc6be1801fdf5fa8 --- /dev/null +++ b/feature_extractor_models/decoders/pspnet/decoder.py @@ -0,0 +1,76 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from feature_extractor_models.base import modules + + +class PSPBlock(nn.Module): + def __init__(self, in_channels, out_channels, pool_size, use_bathcnorm=True): + super().__init__() + if pool_size == 1: + use_bathcnorm = False # PyTorch does not support BatchNorm for 1x1 shape + self.pool = nn.Sequential( + nn.AdaptiveAvgPool2d(output_size=(pool_size, pool_size)), + modules.Conv2dReLU( + in_channels, out_channels, (1, 1), use_batchnorm=use_bathcnorm + ), + ) + + def forward(self, x): + h, w = x.size(2), x.size(3) + x = self.pool(x) + x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=True) + return x + + +class PSPModule(nn.Module): + def __init__(self, in_channels, sizes=(1, 2, 3, 6), use_bathcnorm=True): + super().__init__() + + self.blocks = nn.ModuleList( + [ + PSPBlock( + in_channels, + in_channels // len(sizes), + size, + use_bathcnorm=use_bathcnorm, + ) + for size in sizes + ] + ) + + def forward(self, x): + xs = [block(x) for block in self.blocks] + [x] + x = torch.cat(xs, dim=1) + return x + + +class PSPDecoder(nn.Module): + def __init__( + self, encoder_channels, use_batchnorm=True, out_channels=512, dropout=0.2 + ): + super().__init__() + + self.psp = PSPModule( + in_channels=encoder_channels[-1], + sizes=(1, 2, 3, 6), + use_bathcnorm=use_batchnorm, + ) + + self.conv = modules.Conv2dReLU( + in_channels=encoder_channels[-1] * 2, + out_channels=out_channels, + kernel_size=1, + use_batchnorm=use_batchnorm, + ) + + self.dropout = nn.Dropout2d(p=dropout) + + def forward(self, *features): + x = features[-1] + x = self.psp(x) + x = self.conv(x) + x = self.dropout(x) + + return x diff --git a/feature_extractor_models/decoders/pspnet/model.py b/feature_extractor_models/decoders/pspnet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..1c5f39a53823c717d16cdf4ce77c9095d0d69af3 --- /dev/null +++ b/feature_extractor_models/decoders/pspnet/model.py @@ -0,0 +1,101 @@ +from typing import Optional, Union + +from feature_extractor_models.encoders import get_encoder +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from .decoder import PSPDecoder + + +class PSPNet(SegmentationModel): + """PSPNet_ is a fully convolution neural network for image semantic segmentation. Consist of + *encoder* and *Spatial Pyramid* (decoder). Spatial Pyramid build on top of encoder and does not + use "fine-features" (features of high spatial resolution). PSPNet can be used for multiclass segmentation + of high resolution images, however it is not good for detecting small objects and producing accurate, + pixel-level mask. + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + psp_out_channels: A number of filters in Spatial Pyramid + psp_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers + is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption. + Available options are **True, False, "inplace"** + psp_dropout: Spatial dropout rate in [0, 1) used in Spatial Pyramid + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **PSPNet** + + .. _PSPNet: + https://arxiv.org/abs/1612.01105 + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_weights: Optional[str] = "imagenet", + encoder_depth: int = 3, + psp_out_channels: int = 512, + psp_use_batchnorm: bool = True, + psp_dropout: float = 0.2, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + upsampling: int = 8, + aux_params: Optional[dict] = None, + ): + super().__init__() + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = PSPDecoder( + encoder_channels=self.encoder.out_channels, + use_batchnorm=psp_use_batchnorm, + out_channels=psp_out_channels, + dropout=psp_dropout, + ) + + self.segmentation_head = SegmentationHead( + in_channels=psp_out_channels, + out_channels=classes, + kernel_size=3, + activation=activation, + upsampling=upsampling, + ) + + if aux_params: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "psp-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/unet/__init__.py b/feature_extractor_models/decoders/unet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bcae4e0fbb8647fcfe9f2bd481964bbbe9ef0290 --- /dev/null +++ b/feature_extractor_models/decoders/unet/__init__.py @@ -0,0 +1,3 @@ +from .model import Unet + +__all__ = ["Unet"] diff --git a/feature_extractor_models/decoders/unet/decoder.py b/feature_extractor_models/decoders/unet/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..313b208415095e3c9635b44f989958acb4a4db30 --- /dev/null +++ b/feature_extractor_models/decoders/unet/decoder.py @@ -0,0 +1,124 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from feature_extractor_models.base import modules as md + + +class DecoderBlock(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + use_batchnorm=True, + attention_type=None, + ): + super().__init__() + self.conv1 = md.Conv2dReLU( + in_channels + skip_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.attention1 = md.Attention( + attention_type, in_channels=in_channels + skip_channels + ) + self.conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.attention2 = md.Attention(attention_type, in_channels=out_channels) + + def forward(self, x, skip=None): + x = F.interpolate(x, scale_factor=2, mode="nearest") + if skip is not None: + x = torch.cat([x, skip], dim=1) + x = self.attention1(x) + x = self.conv1(x) + x = self.conv2(x) + x = self.attention2(x) + return x + + +class CenterBlock(nn.Sequential): + def __init__(self, in_channels, out_channels, use_batchnorm=True): + conv1 = md.Conv2dReLU( + in_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + super().__init__(conv1, conv2) + + +class UnetDecoder(nn.Module): + def __init__( + self, + encoder_channels, + decoder_channels, + n_blocks=5, + use_batchnorm=True, + attention_type=None, + center=False, + ): + super().__init__() + + if n_blocks != len(decoder_channels): + raise ValueError( + "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format( + n_blocks, len(decoder_channels) + ) + ) + + # remove first skip with same spatial resolution + encoder_channels = encoder_channels[1:] + # reverse channels to start from head of encoder + encoder_channels = encoder_channels[::-1] + + # computing blocks input and output channels + head_channels = encoder_channels[0] + in_channels = [head_channels] + list(decoder_channels[:-1]) + skip_channels = list(encoder_channels[1:]) + [0] + out_channels = decoder_channels + + if center: + self.center = CenterBlock( + head_channels, head_channels, use_batchnorm=use_batchnorm + ) + else: + self.center = nn.Identity() + + # combine decoder keyword arguments + kwargs = dict(use_batchnorm=use_batchnorm, attention_type=attention_type) + blocks = [ + DecoderBlock(in_ch, skip_ch, out_ch, **kwargs) + for in_ch, skip_ch, out_ch in zip(in_channels, skip_channels, out_channels) + ] + self.blocks = nn.ModuleList(blocks) + + def forward(self, *features): + features = features[1:] # remove first skip with same spatial resolution + features = features[::-1] # reverse channels to start from head of encoder + + head = features[0] + skips = features[1:] + + x = self.center(head) + for i, decoder_block in enumerate(self.blocks): + skip = skips[i] if i < len(skips) else None + x = decoder_block(x, skip) + + return x diff --git a/feature_extractor_models/decoders/unet/model.py b/feature_extractor_models/decoders/unet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..228064ef4560342986df0d6cc60df53e379bd56d --- /dev/null +++ b/feature_extractor_models/decoders/unet/model.py @@ -0,0 +1,102 @@ +from typing import Optional, Union, List + +from feature_extractor_models.encoders import get_encoder +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from .decoder import UnetDecoder + + +class Unet(SegmentationModel): + """Unet_ is a fully convolution neural network for image semantic segmentation. Consist of *encoder* + and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial + resolution (skip connections) which are used by decoder to define accurate segmentation mask. Use *concatenation* + for fusing decoder blocks with skip connections. + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder. + Length of the list should be the same as **encoder_depth** + decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers + is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption. + Available options are **True, False, "inplace"** + decoder_attention_type: Attention module used in decoder of the model. Available options are + **None** and **scse** (https://arxiv.org/abs/1808.08127). + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: Unet + + .. _Unet: + https://arxiv.org/abs/1505.04597 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_use_batchnorm: bool = True, + decoder_channels: List[int] = (256, 128, 64, 32, 16), + decoder_attention_type: Optional[str] = None, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + aux_params: Optional[dict] = None, + ): + super().__init__() + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = UnetDecoder( + encoder_channels=self.encoder.out_channels, + decoder_channels=decoder_channels, + n_blocks=encoder_depth, + use_batchnorm=decoder_use_batchnorm, + center=True if encoder_name.startswith("vgg") else False, + attention_type=decoder_attention_type, + ) + + self.segmentation_head = SegmentationHead( + in_channels=decoder_channels[-1], + out_channels=classes, + activation=activation, + kernel_size=3, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "u-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/decoders/unetplusplus/__init__.py b/feature_extractor_models/decoders/unetplusplus/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fd30de4abe176233aaed9daa57be792aaee00293 --- /dev/null +++ b/feature_extractor_models/decoders/unetplusplus/__init__.py @@ -0,0 +1,3 @@ +from .model import UnetPlusPlus + +__all__ = ["UnetPlusPlus"] diff --git a/feature_extractor_models/decoders/unetplusplus/decoder.py b/feature_extractor_models/decoders/unetplusplus/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..551b48a63a8f24fd25cae4b7cacaceb89011862f --- /dev/null +++ b/feature_extractor_models/decoders/unetplusplus/decoder.py @@ -0,0 +1,155 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from feature_extractor_models.base import modules as md + + +class DecoderBlock(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + use_batchnorm=True, + attention_type=None, + ): + super().__init__() + self.conv1 = md.Conv2dReLU( + in_channels + skip_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.attention1 = md.Attention( + attention_type, in_channels=in_channels + skip_channels + ) + self.conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + self.attention2 = md.Attention(attention_type, in_channels=out_channels) + + def forward(self, x, skip=None): + x = F.interpolate(x, scale_factor=2, mode="nearest") + if skip is not None: + x = torch.cat([x, skip], dim=1) + x = self.attention1(x) + x = self.conv1(x) + x = self.conv2(x) + x = self.attention2(x) + return x + + +class CenterBlock(nn.Sequential): + def __init__(self, in_channels, out_channels, use_batchnorm=True): + conv1 = md.Conv2dReLU( + in_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + conv2 = md.Conv2dReLU( + out_channels, + out_channels, + kernel_size=3, + padding=1, + use_batchnorm=use_batchnorm, + ) + super().__init__(conv1, conv2) + + +class UnetPlusPlusDecoder(nn.Module): + def __init__( + self, + encoder_channels, + decoder_channels, + n_blocks=5, + use_batchnorm=True, + attention_type=None, + center=False, + ): + super().__init__() + + if n_blocks != len(decoder_channels): + raise ValueError( + "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format( + n_blocks, len(decoder_channels) + ) + ) + + # remove first skip with same spatial resolution + encoder_channels = encoder_channels[1:] + # reverse channels to start from head of encoder + encoder_channels = encoder_channels[::-1] + + # computing blocks input and output channels + head_channels = encoder_channels[0] + self.in_channels = [head_channels] + list(decoder_channels[:-1]) + self.skip_channels = list(encoder_channels[1:]) + [0] + self.out_channels = decoder_channels + if center: + self.center = CenterBlock( + head_channels, head_channels, use_batchnorm=use_batchnorm + ) + else: + self.center = nn.Identity() + + # combine decoder keyword arguments + kwargs = dict(use_batchnorm=use_batchnorm, attention_type=attention_type) + + blocks = {} + for layer_idx in range(len(self.in_channels) - 1): + for depth_idx in range(layer_idx + 1): + if depth_idx == 0: + in_ch = self.in_channels[layer_idx] + skip_ch = self.skip_channels[layer_idx] * (layer_idx + 1) + out_ch = self.out_channels[layer_idx] + else: + out_ch = self.skip_channels[layer_idx] + skip_ch = self.skip_channels[layer_idx] * ( + layer_idx + 1 - depth_idx + ) + in_ch = self.skip_channels[layer_idx - 1] + blocks[f"x_{depth_idx}_{layer_idx}"] = DecoderBlock( + in_ch, skip_ch, out_ch, **kwargs + ) + blocks[f"x_{0}_{len(self.in_channels)-1}"] = DecoderBlock( + self.in_channels[-1], 0, self.out_channels[-1], **kwargs + ) + self.blocks = nn.ModuleDict(blocks) + self.depth = len(self.in_channels) - 1 + + def forward(self, *features): + features = features[1:] # remove first skip with same spatial resolution + features = features[::-1] # reverse channels to start from head of encoder + # start building dense connections + dense_x = {} + for layer_idx in range(len(self.in_channels) - 1): + for depth_idx in range(self.depth - layer_idx): + if layer_idx == 0: + output = self.blocks[f"x_{depth_idx}_{depth_idx}"]( + features[depth_idx], features[depth_idx + 1] + ) + dense_x[f"x_{depth_idx}_{depth_idx}"] = output + else: + dense_l_i = depth_idx + layer_idx + cat_features = [ + dense_x[f"x_{idx}_{dense_l_i}"] + for idx in range(depth_idx + 1, dense_l_i + 1) + ] + cat_features = torch.cat( + cat_features + [features[dense_l_i + 1]], dim=1 + ) + dense_x[f"x_{depth_idx}_{dense_l_i}"] = self.blocks[ + f"x_{depth_idx}_{dense_l_i}" + ](dense_x[f"x_{depth_idx}_{dense_l_i-1}"], cat_features) + dense_x[f"x_{0}_{self.depth}"] = self.blocks[f"x_{0}_{self.depth}"]( + dense_x[f"x_{0}_{self.depth-1}"] + ) + return dense_x[f"x_{0}_{self.depth}"] diff --git a/feature_extractor_models/decoders/unetplusplus/model.py b/feature_extractor_models/decoders/unetplusplus/model.py new file mode 100644 index 0000000000000000000000000000000000000000..319881342838454c7df18f6844bdb903e8ffc9e6 --- /dev/null +++ b/feature_extractor_models/decoders/unetplusplus/model.py @@ -0,0 +1,107 @@ +from typing import Optional, Union, List + +from feature_extractor_models.encoders import get_encoder +from feature_extractor_models.base import ( + SegmentationModel, + SegmentationHead, + ClassificationHead, +) +from .decoder import UnetPlusPlusDecoder + + +class UnetPlusPlus(SegmentationModel): + """Unet++ is a fully convolution neural network for image semantic segmentation. Consist of *encoder* + and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial + resolution (skip connections) which are used by decoder to define accurate segmentation mask. Decoder of + Unet++ is more complex than in usual Unet. + + Args: + encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) + to extract features of different spatial resolution + encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features + two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features + with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). + Default is 5 + encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and + other pretrained weights (see table with available weights for each encoder_name) + decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder. + Length of the list should be the same as **encoder_depth** + decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers + is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption. + Available options are **True, False, "inplace"** + decoder_attention_type: Attention module used in decoder of the model. + Available options are **None** and **scse** (https://arxiv.org/abs/1808.08127). + in_channels: A number of input channels for the model, default is 3 (RGB images) + classes: A number of classes for output mask (or you can think as a number of channels of output mask) + activation: An activation function to apply after the final convolution layer. + Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, + **callable** and **None**. + Default is **None** + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + on top of encoder if **aux_params** is not **None** (default). Supported params: + - classes (int): A number of classes + - pooling (str): One of "max", "avg". Default is "avg" + - dropout (float): Dropout factor in [0, 1) + - activation (str): An activation function to apply "sigmoid"/"softmax" + (could be **None** to return logits) + + Returns: + ``torch.nn.Module``: **Unet++** + + Reference: + https://arxiv.org/abs/1807.10165 + + """ + + def __init__( + self, + encoder_name: str = "resnet34", + encoder_depth: int = 5, + encoder_weights: Optional[str] = "imagenet", + decoder_use_batchnorm: bool = True, + decoder_channels: List[int] = (256, 128, 64, 32, 16), + decoder_attention_type: Optional[str] = None, + in_channels: int = 3, + classes: int = 1, + activation: Optional[Union[str, callable]] = None, + aux_params: Optional[dict] = None, + ): + super().__init__() + + if encoder_name.startswith("mit_b"): + raise ValueError( + "UnetPlusPlus is not support encoder_name={}".format(encoder_name) + ) + + self.encoder = get_encoder( + encoder_name, + in_channels=in_channels, + depth=encoder_depth, + weights=encoder_weights, + ) + + self.decoder = UnetPlusPlusDecoder( + encoder_channels=self.encoder.out_channels, + decoder_channels=decoder_channels, + n_blocks=encoder_depth, + use_batchnorm=decoder_use_batchnorm, + center=True if encoder_name.startswith("vgg") else False, + attention_type=decoder_attention_type, + ) + + self.segmentation_head = SegmentationHead( + in_channels=decoder_channels[-1], + out_channels=classes, + activation=activation, + kernel_size=3, + ) + + if aux_params is not None: + self.classification_head = ClassificationHead( + in_channels=self.encoder.out_channels[-1], **aux_params + ) + else: + self.classification_head = None + + self.name = "unetplusplus-{}".format(encoder_name) + self.initialize() diff --git a/feature_extractor_models/encoders/__init__.py b/feature_extractor_models/encoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c4a4c037537f8bc51a02f377d0c238ba4c3cd6f8 --- /dev/null +++ b/feature_extractor_models/encoders/__init__.py @@ -0,0 +1,126 @@ +import timm +import functools +import torch.utils.model_zoo as model_zoo + +from .resnet import resnet_encoders +from .dpn import dpn_encoders +from .vgg import vgg_encoders +from .senet import senet_encoders +from .densenet import densenet_encoders +from .inceptionresnetv2 import inceptionresnetv2_encoders +from .inceptionv4 import inceptionv4_encoders +from .efficientnet import efficient_net_encoders +from .mobilenet import mobilenet_encoders +from .xception import xception_encoders +from .timm_efficientnet import timm_efficientnet_encoders +from .timm_resnest import timm_resnest_encoders +from .timm_res2net import timm_res2net_encoders +from .timm_regnet import timm_regnet_encoders +from .timm_sknet import timm_sknet_encoders +from .timm_mobilenetv3 import timm_mobilenetv3_encoders +from .timm_gernet import timm_gernet_encoders +from .mix_transformer import mix_transformer_encoders +from .mobileone import mobileone_encoders + +from .timm_universal import TimmUniversalEncoder + +from ._preprocessing import preprocess_input + +encoders = {} +encoders.update(resnet_encoders) +encoders.update(dpn_encoders) +encoders.update(vgg_encoders) +encoders.update(senet_encoders) +encoders.update(densenet_encoders) +encoders.update(inceptionresnetv2_encoders) +encoders.update(inceptionv4_encoders) +encoders.update(efficient_net_encoders) +encoders.update(mobilenet_encoders) +encoders.update(xception_encoders) +encoders.update(timm_efficientnet_encoders) +encoders.update(timm_resnest_encoders) +encoders.update(timm_res2net_encoders) +encoders.update(timm_regnet_encoders) +encoders.update(timm_sknet_encoders) +encoders.update(timm_mobilenetv3_encoders) +encoders.update(timm_gernet_encoders) +encoders.update(mix_transformer_encoders) +encoders.update(mobileone_encoders) + + +def get_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, **kwargs): + if name.startswith("tu-"): + name = name[3:] + encoder = TimmUniversalEncoder( + name=name, + in_channels=in_channels, + depth=depth, + output_stride=output_stride, + pretrained=weights is not None, + **kwargs, + ) + return encoder + + try: + Encoder = encoders[name]["encoder"] + except KeyError: + raise KeyError( + "Wrong encoder name `{}`, supported encoders: {}".format( + name, list(encoders.keys()) + ) + ) + + params = encoders[name]["params"] + params.update(depth=depth) + encoder = Encoder(**params) + + if weights is not None: + try: + settings = encoders[name]["pretrained_settings"][weights] + except KeyError: + raise KeyError( + "Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format( + weights, name, list(encoders[name]["pretrained_settings"].keys()) + ) + ) + encoder.load_state_dict(model_zoo.load_url(settings["url"])) + + encoder.set_in_channels(in_channels, pretrained=weights is not None) + if output_stride != 32: + encoder.make_dilated(output_stride) + + return encoder + + +def get_encoder_names(): + return list(encoders.keys()) + + +def get_preprocessing_params(encoder_name, pretrained="imagenet"): + if encoder_name.startswith("tu-"): + encoder_name = encoder_name[3:] + if not timm.models.is_model_pretrained(encoder_name): + raise ValueError( + f"{encoder_name} does not have pretrained weights and preprocessing parameters" + ) + settings = timm.models.get_pretrained_cfg(encoder_name).__dict__ + else: + all_settings = encoders[encoder_name]["pretrained_settings"] + if pretrained not in all_settings.keys(): + raise ValueError( + "Available pretrained options {}".format(all_settings.keys()) + ) + settings = all_settings[pretrained] + + formatted_settings = {} + formatted_settings["input_space"] = settings.get("input_space", "RGB") + formatted_settings["input_range"] = list(settings.get("input_range", [0, 1])) + formatted_settings["mean"] = list(settings["mean"]) + formatted_settings["std"] = list(settings["std"]) + + return formatted_settings + + +def get_preprocessing_fn(encoder_name, pretrained="imagenet"): + params = get_preprocessing_params(encoder_name, pretrained=pretrained) + return functools.partial(preprocess_input, **params) diff --git a/feature_extractor_models/encoders/_base.py b/feature_extractor_models/encoders/_base.py new file mode 100644 index 0000000000000000000000000000000000000000..3b877075394a30206250ccd26e6c2305130fbf05 --- /dev/null +++ b/feature_extractor_models/encoders/_base.py @@ -0,0 +1,58 @@ +from . import _utils as utils + + +class EncoderMixin: + """Add encoder functionality such as: + - output channels specification of feature tensors (produced by encoder) + - patching first convolution for arbitrary input channels + """ + + _output_stride = 32 + + @property + def out_channels(self): + """Return channels dimensions for each tensor of forward output of encoder""" + return self._out_channels[: self._depth + 1] + + @property + def output_stride(self): + return min(self._output_stride, 2**self._depth) + + def set_in_channels(self, in_channels, pretrained=True): + """Change first convolution channels""" + if in_channels == 3: + return + + self._in_channels = in_channels + if self._out_channels[0] == 3: + self._out_channels = tuple([in_channels] + list(self._out_channels)[1:]) + + utils.patch_first_conv( + model=self, new_in_channels=in_channels, pretrained=pretrained + ) + + def get_stages(self): + """Override it in your implementation""" + raise NotImplementedError + + def make_dilated(self, output_stride): + if output_stride == 16: + stage_list = [5] + dilation_list = [2] + + elif output_stride == 8: + stage_list = [4, 5] + dilation_list = [2, 4] + + else: + raise ValueError( + "Output stride should be 16 or 8, got {}.".format(output_stride) + ) + + self._output_stride = output_stride + + stages = self.get_stages() + for stage_indx, dilation_rate in zip(stage_list, dilation_list): + utils.replace_strides_with_dilation( + module=stages[stage_indx], dilation_rate=dilation_rate + ) diff --git a/feature_extractor_models/encoders/_preprocessing.py b/feature_extractor_models/encoders/_preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..57c7e6d2d566aca14c963adda7ee318853419d9d --- /dev/null +++ b/feature_extractor_models/encoders/_preprocessing.py @@ -0,0 +1,22 @@ +import numpy as np + + +def preprocess_input( + x, mean=None, std=None, input_space="RGB", input_range=None, **kwargs +): + if input_space == "BGR": + x = x[..., ::-1].copy() + + if input_range is not None: + if x.max() > 1 and input_range[1] == 1: + x = x / 255.0 + + if mean is not None: + mean = np.array(mean) + x = x - mean + + if std is not None: + std = np.array(std) + x = x / std + + return x diff --git a/feature_extractor_models/encoders/_utils.py b/feature_extractor_models/encoders/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..86128099a550986ca51a3ff263748b4e76a46911 --- /dev/null +++ b/feature_extractor_models/encoders/_utils.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn + + +def patch_first_conv(model, new_in_channels, default_in_channels=3, pretrained=True): + """Change first convolution layer input channels. + In case: + in_channels == 1 or in_channels == 2 -> reuse original weights + in_channels > 3 -> make random kaiming normal initialization + """ + + # get first conv + for module in model.modules(): + if isinstance(module, nn.Conv2d) and module.in_channels == default_in_channels: + break + + weight = module.weight.detach() + module.in_channels = new_in_channels + + if not pretrained: + module.weight = nn.parameter.Parameter( + torch.Tensor( + module.out_channels, + new_in_channels // module.groups, + *module.kernel_size, + ) + ) + module.reset_parameters() + + elif new_in_channels == 1: + new_weight = weight.sum(1, keepdim=True) + module.weight = nn.parameter.Parameter(new_weight) + + else: + new_weight = torch.Tensor( + module.out_channels, new_in_channels // module.groups, *module.kernel_size + ) + + for i in range(new_in_channels): + new_weight[:, i] = weight[:, i % default_in_channels] + + new_weight = new_weight * (default_in_channels / new_in_channels) + module.weight = nn.parameter.Parameter(new_weight) + + +def replace_strides_with_dilation(module, dilation_rate): + """Patch Conv2d modules replacing strides with dilation""" + for mod in module.modules(): + if isinstance(mod, nn.Conv2d): + mod.stride = (1, 1) + mod.dilation = (dilation_rate, dilation_rate) + kh, kw = mod.kernel_size + mod.padding = ((kh // 2) * dilation_rate, (kh // 2) * dilation_rate) + + # Kostyl for EfficientNet + if hasattr(mod, "static_padding"): + mod.static_padding = nn.Identity() diff --git a/feature_extractor_models/encoders/densenet.py b/feature_extractor_models/encoders/densenet.py new file mode 100644 index 0000000000000000000000000000000000000000..c4bd0ce22c0f084c87c8ae8e8bc57f1fac726448 --- /dev/null +++ b/feature_extractor_models/encoders/densenet.py @@ -0,0 +1,155 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import re +import torch.nn as nn + +from pretrainedmodels.models.torchvision_models import pretrained_settings +from torchvision.models.densenet import DenseNet + +from ._base import EncoderMixin + + +class TransitionWithSkip(nn.Module): + def __init__(self, module): + super().__init__() + self.module = module + + def forward(self, x): + for module in self.module: + x = module(x) + if isinstance(module, nn.ReLU): + skip = x + return x, skip + + +class DenseNetEncoder(DenseNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + del self.classifier + + def make_dilated(self, *args, **kwargs): + raise ValueError( + "DenseNet encoders do not support dilated mode " + "due to pooling operation for downsampling!" + ) + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential( + self.features.conv0, self.features.norm0, self.features.relu0 + ), + nn.Sequential( + self.features.pool0, + self.features.denseblock1, + TransitionWithSkip(self.features.transition1), + ), + nn.Sequential( + self.features.denseblock2, TransitionWithSkip(self.features.transition2) + ), + nn.Sequential( + self.features.denseblock3, TransitionWithSkip(self.features.transition3) + ), + nn.Sequential(self.features.denseblock4, self.features.norm5), + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + if isinstance(x, (list, tuple)): + x, skip = x + features.append(skip) + else: + features.append(x) + + return features + + def load_state_dict(self, state_dict): + pattern = re.compile( + r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$" + ) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + + # remove linear + state_dict.pop("classifier.bias", None) + state_dict.pop("classifier.weight", None) + + super().load_state_dict(state_dict) + + +densenet_encoders = { + "densenet121": { + "encoder": DenseNetEncoder, + "pretrained_settings": pretrained_settings["densenet121"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 1024), + "num_init_features": 64, + "growth_rate": 32, + "block_config": (6, 12, 24, 16), + }, + }, + "densenet169": { + "encoder": DenseNetEncoder, + "pretrained_settings": pretrained_settings["densenet169"], + "params": { + "out_channels": (3, 64, 256, 512, 1280, 1664), + "num_init_features": 64, + "growth_rate": 32, + "block_config": (6, 12, 32, 32), + }, + }, + "densenet201": { + "encoder": DenseNetEncoder, + "pretrained_settings": pretrained_settings["densenet201"], + "params": { + "out_channels": (3, 64, 256, 512, 1792, 1920), + "num_init_features": 64, + "growth_rate": 32, + "block_config": (6, 12, 48, 32), + }, + }, + "densenet161": { + "encoder": DenseNetEncoder, + "pretrained_settings": pretrained_settings["densenet161"], + "params": { + "out_channels": (3, 96, 384, 768, 2112, 2208), + "num_init_features": 96, + "growth_rate": 48, + "block_config": (6, 12, 36, 24), + }, + }, +} diff --git a/feature_extractor_models/encoders/dpn.py b/feature_extractor_models/encoders/dpn.py new file mode 100644 index 0000000000000000000000000000000000000000..220c66de77985a048f88d0825467d520500ca076 --- /dev/null +++ b/feature_extractor_models/encoders/dpn.py @@ -0,0 +1,173 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from pretrainedmodels.models.dpn import DPN +from pretrainedmodels.models.dpn import pretrained_settings + +from ._base import EncoderMixin + + +class DPNEncoder(DPN, EncoderMixin): + def __init__(self, stage_idxs, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._stage_idxs = stage_idxs + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.last_linear + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential( + self.features[0].conv, self.features[0].bn, self.features[0].act + ), + nn.Sequential( + self.features[0].pool, self.features[1 : self._stage_idxs[0]] + ), + self.features[self._stage_idxs[0] : self._stage_idxs[1]], + self.features[self._stage_idxs[1] : self._stage_idxs[2]], + self.features[self._stage_idxs[2] : self._stage_idxs[3]], + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + if isinstance(x, (list, tuple)): + features.append(F.relu(torch.cat(x, dim=1), inplace=True)) + else: + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("last_linear.bias", None) + state_dict.pop("last_linear.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +dpn_encoders = { + "dpn68": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn68"], + "params": { + "stage_idxs": (4, 8, 20, 24), + "out_channels": (3, 10, 144, 320, 704, 832), + "groups": 32, + "inc_sec": (16, 32, 32, 64), + "k_r": 128, + "k_sec": (3, 4, 12, 3), + "num_classes": 1000, + "num_init_features": 10, + "small": True, + "test_time_pool": True, + }, + }, + "dpn68b": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn68b"], + "params": { + "stage_idxs": (4, 8, 20, 24), + "out_channels": (3, 10, 144, 320, 704, 832), + "b": True, + "groups": 32, + "inc_sec": (16, 32, 32, 64), + "k_r": 128, + "k_sec": (3, 4, 12, 3), + "num_classes": 1000, + "num_init_features": 10, + "small": True, + "test_time_pool": True, + }, + }, + "dpn92": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn92"], + "params": { + "stage_idxs": (4, 8, 28, 32), + "out_channels": (3, 64, 336, 704, 1552, 2688), + "groups": 32, + "inc_sec": (16, 32, 24, 128), + "k_r": 96, + "k_sec": (3, 4, 20, 3), + "num_classes": 1000, + "num_init_features": 64, + "test_time_pool": True, + }, + }, + "dpn98": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn98"], + "params": { + "stage_idxs": (4, 10, 30, 34), + "out_channels": (3, 96, 336, 768, 1728, 2688), + "groups": 40, + "inc_sec": (16, 32, 32, 128), + "k_r": 160, + "k_sec": (3, 6, 20, 3), + "num_classes": 1000, + "num_init_features": 96, + "test_time_pool": True, + }, + }, + "dpn107": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn107"], + "params": { + "stage_idxs": (5, 13, 33, 37), + "out_channels": (3, 128, 376, 1152, 2432, 2688), + "groups": 50, + "inc_sec": (20, 64, 64, 128), + "k_r": 200, + "k_sec": (4, 8, 20, 3), + "num_classes": 1000, + "num_init_features": 128, + "test_time_pool": True, + }, + }, + "dpn131": { + "encoder": DPNEncoder, + "pretrained_settings": pretrained_settings["dpn131"], + "params": { + "stage_idxs": (5, 13, 41, 45), + "out_channels": (3, 128, 352, 832, 1984, 2688), + "groups": 40, + "inc_sec": (16, 32, 32, 128), + "k_r": 160, + "k_sec": (4, 8, 28, 3), + "num_classes": 1000, + "num_init_features": 128, + "test_time_pool": True, + }, + }, +} diff --git a/feature_extractor_models/encoders/efficientnet.py b/feature_extractor_models/encoders/efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4a7af6b4498ba910f56daf471fb8448d137f1ffd --- /dev/null +++ b/feature_extractor_models/encoders/efficientnet.py @@ -0,0 +1,177 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch.nn as nn +from efficientnet_pytorch import EfficientNet +from efficientnet_pytorch.utils import url_map, url_map_advprop, get_model_params + +from ._base import EncoderMixin + + +class EfficientNetEncoder(EfficientNet, EncoderMixin): + def __init__(self, stage_idxs, out_channels, model_name, depth=5): + blocks_args, global_params = get_model_params(model_name, override_params=None) + super().__init__(blocks_args, global_params) + + self._stage_idxs = stage_idxs + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + del self._fc + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self._conv_stem, self._bn0, self._swish), + self._blocks[: self._stage_idxs[0]], + self._blocks[self._stage_idxs[0] : self._stage_idxs[1]], + self._blocks[self._stage_idxs[1] : self._stage_idxs[2]], + self._blocks[self._stage_idxs[2] :], + ] + + def forward(self, x): + stages = self.get_stages() + + block_number = 0.0 + drop_connect_rate = self._global_params.drop_connect_rate + + features = [] + for i in range(self._depth + 1): + # Identity and Sequential stages + if i < 2: + x = stages[i](x) + + # Block stages need drop_connect rate + else: + for module in stages[i]: + drop_connect = drop_connect_rate * block_number / len(self._blocks) + block_number += 1.0 + x = module(x, drop_connect) + + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("_fc.bias", None) + state_dict.pop("_fc.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +def _get_pretrained_settings(encoder): + pretrained_settings = { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": url_map[encoder], + "input_space": "RGB", + "input_range": [0, 1], + }, + "advprop": { + "mean": [0.5, 0.5, 0.5], + "std": [0.5, 0.5, 0.5], + "url": url_map_advprop[encoder], + "input_space": "RGB", + "input_range": [0, 1], + }, + } + return pretrained_settings + + +efficient_net_encoders = { + "efficientnet-b0": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b0"), + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (3, 5, 9, 16), + "model_name": "efficientnet-b0", + }, + }, + "efficientnet-b1": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b1"), + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (5, 8, 16, 23), + "model_name": "efficientnet-b1", + }, + }, + "efficientnet-b2": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b2"), + "params": { + "out_channels": (3, 32, 24, 48, 120, 352), + "stage_idxs": (5, 8, 16, 23), + "model_name": "efficientnet-b2", + }, + }, + "efficientnet-b3": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b3"), + "params": { + "out_channels": (3, 40, 32, 48, 136, 384), + "stage_idxs": (5, 8, 18, 26), + "model_name": "efficientnet-b3", + }, + }, + "efficientnet-b4": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b4"), + "params": { + "out_channels": (3, 48, 32, 56, 160, 448), + "stage_idxs": (6, 10, 22, 32), + "model_name": "efficientnet-b4", + }, + }, + "efficientnet-b5": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b5"), + "params": { + "out_channels": (3, 48, 40, 64, 176, 512), + "stage_idxs": (8, 13, 27, 39), + "model_name": "efficientnet-b5", + }, + }, + "efficientnet-b6": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b6"), + "params": { + "out_channels": (3, 56, 40, 72, 200, 576), + "stage_idxs": (9, 15, 31, 45), + "model_name": "efficientnet-b6", + }, + }, + "efficientnet-b7": { + "encoder": EfficientNetEncoder, + "pretrained_settings": _get_pretrained_settings("efficientnet-b7"), + "params": { + "out_channels": (3, 64, 48, 80, 224, 640), + "stage_idxs": (11, 18, 38, 55), + "model_name": "efficientnet-b7", + }, + }, +} diff --git a/feature_extractor_models/encoders/inceptionresnetv2.py b/feature_extractor_models/encoders/inceptionresnetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..5d90c7f475166ac17e2826dd4b093eceea0d9097 --- /dev/null +++ b/feature_extractor_models/encoders/inceptionresnetv2.py @@ -0,0 +1,91 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch.nn as nn +from pretrainedmodels.models.inceptionresnetv2 import InceptionResNetV2 +from pretrainedmodels.models.inceptionresnetv2 import pretrained_settings + +from ._base import EncoderMixin + + +class InceptionResNetV2Encoder(InceptionResNetV2, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + # correct paddings + for m in self.modules(): + if isinstance(m, nn.Conv2d): + if m.kernel_size == (3, 3): + m.padding = (1, 1) + if isinstance(m, nn.MaxPool2d): + m.padding = (1, 1) + + # remove linear layers + del self.avgpool_1a + del self.last_linear + + def make_dilated(self, *args, **kwargs): + raise ValueError( + "InceptionResnetV2 encoder does not support dilated mode " + "due to pooling operation for downsampling!" + ) + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv2d_1a, self.conv2d_2a, self.conv2d_2b), + nn.Sequential(self.maxpool_3a, self.conv2d_3b, self.conv2d_4a), + nn.Sequential(self.maxpool_5a, self.mixed_5b, self.repeat), + nn.Sequential(self.mixed_6a, self.repeat_1), + nn.Sequential(self.mixed_7a, self.repeat_2, self.block8, self.conv2d_7b), + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("last_linear.bias", None) + state_dict.pop("last_linear.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +inceptionresnetv2_encoders = { + "inceptionresnetv2": { + "encoder": InceptionResNetV2Encoder, + "pretrained_settings": pretrained_settings["inceptionresnetv2"], + "params": {"out_channels": (3, 64, 192, 320, 1088, 1536), "num_classes": 1000}, + } +} diff --git a/feature_extractor_models/encoders/inceptionv4.py b/feature_extractor_models/encoders/inceptionv4.py new file mode 100644 index 0000000000000000000000000000000000000000..83adf00365379c322fbf0d0a177b78aea21ed65e --- /dev/null +++ b/feature_extractor_models/encoders/inceptionv4.py @@ -0,0 +1,94 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch.nn as nn +from pretrainedmodels.models.inceptionv4 import InceptionV4 +from pretrainedmodels.models.inceptionv4 import pretrained_settings + +from ._base import EncoderMixin + + +class InceptionV4Encoder(InceptionV4, EncoderMixin): + def __init__(self, stage_idxs, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._stage_idxs = stage_idxs + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + # correct paddings + for m in self.modules(): + if isinstance(m, nn.Conv2d): + if m.kernel_size == (3, 3): + m.padding = (1, 1) + if isinstance(m, nn.MaxPool2d): + m.padding = (1, 1) + + # remove linear layers + del self.last_linear + + def make_dilated(self, stage_list, dilation_list): + raise ValueError( + "InceptionV4 encoder does not support dilated mode " + "due to pooling operation for downsampling!" + ) + + def get_stages(self): + return [ + nn.Identity(), + self.features[: self._stage_idxs[0]], + self.features[self._stage_idxs[0] : self._stage_idxs[1]], + self.features[self._stage_idxs[1] : self._stage_idxs[2]], + self.features[self._stage_idxs[2] : self._stage_idxs[3]], + self.features[self._stage_idxs[3] :], + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("last_linear.bias", None) + state_dict.pop("last_linear.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +inceptionv4_encoders = { + "inceptionv4": { + "encoder": InceptionV4Encoder, + "pretrained_settings": pretrained_settings["inceptionv4"], + "params": { + "stage_idxs": (3, 5, 9, 15), + "out_channels": (3, 64, 192, 384, 1024, 1536), + "num_classes": 1001, + }, + } +} diff --git a/feature_extractor_models/encoders/mix_transformer.py b/feature_extractor_models/encoders/mix_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..1c7091256b76fbfe00b4cfe731be7e573b3fe078 --- /dev/null +++ b/feature_extractor_models/encoders/mix_transformer.py @@ -0,0 +1,663 @@ +# --------------------------------------------------------------- +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# +# This work is licensed under the NVIDIA Source Code License +# --------------------------------------------------------------- +import math +import torch +import torch.nn as nn +from functools import partial + +from timm.layers import DropPath, to_2tuple, trunc_normal_ + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.dwconv = DWConv(hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = self.fc1(x) + x = self.dwconv(x, H, W) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + def __init__( + self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + sr_ratio=1, + ): + super().__init__() + assert ( + dim % num_heads == 0 + ), f"dim {dim} should be divided by num_heads {num_heads}." + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.q = nn.Linear(dim, dim, bias=qkv_bias) + self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio) + self.norm = nn.LayerNorm(dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + B, N, C = x.shape + q = ( + self.q(x) + .reshape(B, N, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + ) + + if self.sr_ratio > 1: + x_ = x.permute(0, 2, 1).reshape(B, C, H, W) + x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1) + x_ = self.norm(x_) + kv = ( + self.kv(x_) + .reshape(B, -1, 2, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + else: + kv = ( + self.kv(x) + .reshape(B, -1, 2, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + k, v = kv[0], kv[1] + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class Block(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + sr_ratio=1, + ): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + sr_ratio=sr_ratio, + ) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x), H, W)) + + return x + + +class OverlapPatchEmbed(nn.Module): + """Image to Patch Embedding""" + + def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.img_size = img_size + self.patch_size = patch_size + self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1] + self.num_patches = self.H * self.W + self.proj = nn.Conv2d( + in_chans, + embed_dim, + kernel_size=patch_size, + stride=stride, + padding=(patch_size[0] // 2, patch_size[1] // 2), + ) + self.norm = nn.LayerNorm(embed_dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.proj(x) + _, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + + return x, H, W + + +class MixVisionTransformer(nn.Module): + def __init__( + self, + img_size=224, + patch_size=16, + in_chans=3, + num_classes=1000, + embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + ): + super().__init__() + self.num_classes = num_classes + self.depths = depths + + # patch_embed + self.patch_embed1 = OverlapPatchEmbed( + img_size=img_size, + patch_size=7, + stride=4, + in_chans=in_chans, + embed_dim=embed_dims[0], + ) + self.patch_embed2 = OverlapPatchEmbed( + img_size=img_size // 4, + patch_size=3, + stride=2, + in_chans=embed_dims[0], + embed_dim=embed_dims[1], + ) + self.patch_embed3 = OverlapPatchEmbed( + img_size=img_size // 8, + patch_size=3, + stride=2, + in_chans=embed_dims[1], + embed_dim=embed_dims[2], + ) + self.patch_embed4 = OverlapPatchEmbed( + img_size=img_size // 16, + patch_size=3, + stride=2, + in_chans=embed_dims[2], + embed_dim=embed_dims[3], + ) + + # transformer encoder + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + self.block1 = nn.ModuleList( + [ + Block( + dim=embed_dims[0], + num_heads=num_heads[0], + mlp_ratio=mlp_ratios[0], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[0], + ) + for i in range(depths[0]) + ] + ) + self.norm1 = norm_layer(embed_dims[0]) + + cur += depths[0] + self.block2 = nn.ModuleList( + [ + Block( + dim=embed_dims[1], + num_heads=num_heads[1], + mlp_ratio=mlp_ratios[1], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[1], + ) + for i in range(depths[1]) + ] + ) + self.norm2 = norm_layer(embed_dims[1]) + + cur += depths[1] + self.block3 = nn.ModuleList( + [ + Block( + dim=embed_dims[2], + num_heads=num_heads[2], + mlp_ratio=mlp_ratios[2], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[2], + ) + for i in range(depths[2]) + ] + ) + self.norm3 = norm_layer(embed_dims[2]) + + cur += depths[2] + self.block4 = nn.ModuleList( + [ + Block( + dim=embed_dims[3], + num_heads=num_heads[3], + mlp_ratio=mlp_ratios[3], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[3], + ) + for i in range(depths[3]) + ] + ) + self.norm4 = norm_layer(embed_dims[3]) + + # classification head + # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def init_weights(self, pretrained=None): + pass + + def reset_drop_path(self, drop_path_rate): + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))] + cur = 0 + for i in range(self.depths[0]): + self.block1[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[0] + for i in range(self.depths[1]): + self.block2[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[1] + for i in range(self.depths[2]): + self.block3[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[2] + for i in range(self.depths[3]): + self.block4[i].drop_path.drop_prob = dpr[cur + i] + + def freeze_patch_emb(self): + self.patch_embed1.requires_grad = False + + @torch.jit.ignore + def no_weight_decay(self): + return { + "pos_embed1", + "pos_embed2", + "pos_embed3", + "pos_embed4", + "cls_token", + } # has pos_embed may be better + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=""): + self.num_classes = num_classes + self.head = ( + nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + ) + + def forward_features(self, x): + B = x.shape[0] + outs = [] + + # stage 1 + x, H, W = self.patch_embed1(x) + for i, blk in enumerate(self.block1): + x = blk(x, H, W) + x = self.norm1(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 2 + x, H, W = self.patch_embed2(x) + for i, blk in enumerate(self.block2): + x = blk(x, H, W) + x = self.norm2(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 3 + x, H, W = self.patch_embed3(x) + for i, blk in enumerate(self.block3): + x = blk(x, H, W) + x = self.norm3(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 4 + x, H, W = self.patch_embed4(x) + for i, blk in enumerate(self.block4): + x = blk(x, H, W) + x = self.norm4(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + return outs + + def forward(self, x): + x = self.forward_features(x) + # x = self.head(x) + + return x + + +class DWConv(nn.Module): + def __init__(self, dim=768): + super(DWConv, self).__init__() + self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim) + + def forward(self, x, H, W): + B, N, C = x.shape + x = x.transpose(1, 2).view(B, C, H, W) + x = self.dwconv(x) + x = x.flatten(2).transpose(1, 2) + + return x + + +# --------------------------------------------------------------- +# End of NVIDIA code +# --------------------------------------------------------------- + +from ._base import EncoderMixin # noqa E402 + + +class MixVisionTransformerEncoder(MixVisionTransformer, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + def make_dilated(self, *args, **kwargs): + raise ValueError("MixVisionTransformer encoder does not support dilated mode") + + def set_in_channels(self, in_channels, *args, **kwargs): + if in_channels != 3: + raise ValueError( + "MixVisionTransformer encoder does not support in_channels setting other than 3" + ) + + def forward(self, x): + # create dummy output for the first block + B, C, H, W = x.shape + dummy = torch.empty([B, 0, H // 2, W // 2], dtype=x.dtype, device=x.device) + + return [x, dummy] + self.forward_features(x)[: self._depth - 1] + + def load_state_dict(self, state_dict): + state_dict.pop("head.weight", None) + state_dict.pop("head.bias", None) + return super().load_state_dict(state_dict) + + +def get_pretrained_cfg(name): + return { + "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/{}.pth".format( + name + ), + "input_space": "RGB", + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + } + + +mix_transformer_encoders = { + "mit_b0": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b0")}, + "params": dict( + out_channels=(3, 0, 32, 64, 160, 256), + patch_size=4, + embed_dims=[32, 64, 160, 256], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[2, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, + "mit_b1": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b1")}, + "params": dict( + out_channels=(3, 0, 64, 128, 320, 512), + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[2, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, + "mit_b2": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b2")}, + "params": dict( + out_channels=(3, 0, 64, 128, 320, 512), + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, + "mit_b3": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b3")}, + "params": dict( + out_channels=(3, 0, 64, 128, 320, 512), + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[3, 4, 18, 3], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, + "mit_b4": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b4")}, + "params": dict( + out_channels=(3, 0, 64, 128, 320, 512), + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[3, 8, 27, 3], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, + "mit_b5": { + "encoder": MixVisionTransformerEncoder, + "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b5")}, + "params": dict( + out_channels=(3, 0, 64, 128, 320, 512), + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + depths=[3, 6, 40, 3], + sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, + drop_path_rate=0.1, + ), + }, +} diff --git a/feature_extractor_models/encoders/mobilenet.py b/feature_extractor_models/encoders/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..dd30f142a822f8df1e5aeb2671a99e7509bc763e --- /dev/null +++ b/feature_extractor_models/encoders/mobilenet.py @@ -0,0 +1,80 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torchvision +import torch.nn as nn + +from ._base import EncoderMixin + + +class MobileNetV2Encoder(torchvision.models.MobileNetV2, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + del self.classifier + + def get_stages(self): + return [ + nn.Identity(), + self.features[:2], + self.features[2:4], + self.features[4:7], + self.features[7:14], + self.features[14:], + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("classifier.1.bias", None) + state_dict.pop("classifier.1.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +mobilenet_encoders = { + "mobilenet_v2": { + "encoder": MobileNetV2Encoder, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth", + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": {"out_channels": (3, 16, 24, 32, 96, 1280)}, + } +} diff --git a/feature_extractor_models/encoders/mobileone.py b/feature_extractor_models/encoders/mobileone.py new file mode 100644 index 0000000000000000000000000000000000000000..76f500536ca85449c9be7153c41a0c24bbf06f83 --- /dev/null +++ b/feature_extractor_models/encoders/mobileone.py @@ -0,0 +1,559 @@ +# +# For licensing see accompanying LICENSE file. +# Copyright (C) 2022 Apple Inc. All Rights Reserved. +# +import copy +from typing import List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import _utils as utils +from ._base import EncoderMixin + +__all__ = ["MobileOne", "reparameterize_model"] + + +class SEBlock(nn.Module): + """Squeeze and Excite module. + + Pytorch implementation of `Squeeze-and-Excitation Networks` - + https://arxiv.org/pdf/1709.01507.pdf + """ + + def __init__(self, in_channels: int, rd_ratio: float = 0.0625) -> None: + """Construct a Squeeze and Excite Module. + + :param in_channels: Number of input channels. + :param rd_ratio: Input channel reduction ratio. + """ + super(SEBlock, self).__init__() + self.reduce = nn.Conv2d( + in_channels=in_channels, + out_channels=int(in_channels * rd_ratio), + kernel_size=1, + stride=1, + bias=True, + ) + self.expand = nn.Conv2d( + in_channels=int(in_channels * rd_ratio), + out_channels=in_channels, + kernel_size=1, + stride=1, + bias=True, + ) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """Apply forward pass.""" + b, c, h, w = inputs.size() + x = F.avg_pool2d(inputs, kernel_size=[h, w]) + x = self.reduce(x) + x = F.relu(x) + x = self.expand(x) + x = torch.sigmoid(x) + x = x.view(-1, c, 1, 1) + return inputs * x + + +class MobileOneBlock(nn.Module): + """MobileOne building block. + + This block has a multi-branched architecture at train-time + and plain-CNN style architecture at inference time + For more details, please refer to our paper: + `An Improved One millisecond Mobile Backbone` - + https://arxiv.org/pdf/2206.04040.pdf + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + groups: int = 1, + inference_mode: bool = False, + use_se: bool = False, + num_conv_branches: int = 1, + ) -> None: + """Construct a MobileOneBlock module. + + :param in_channels: Number of channels in the input. + :param out_channels: Number of channels produced by the block. + :param kernel_size: Size of the convolution kernel. + :param stride: Stride size. + :param padding: Zero-padding size. + :param dilation: Kernel dilation factor. + :param groups: Group number. + :param inference_mode: If True, instantiates model in inference mode. + :param use_se: Whether to use SE-ReLU activations. + :param num_conv_branches: Number of linear conv branches. + """ + super(MobileOneBlock, self).__init__() + self.inference_mode = inference_mode + self.groups = groups + self.stride = stride + self.kernel_size = kernel_size + self.in_channels = in_channels + self.out_channels = out_channels + self.num_conv_branches = num_conv_branches + + # Check if SE-ReLU is requested + if use_se: + self.se = SEBlock(out_channels) + else: + self.se = nn.Identity() + self.activation = nn.ReLU() + + if inference_mode: + self.reparam_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=True, + ) + else: + # Re-parameterizable skip connection + self.rbr_skip = ( + nn.BatchNorm2d(num_features=in_channels) + if out_channels == in_channels and stride == 1 + else None + ) + + # Re-parameterizable conv branches + rbr_conv = list() + for _ in range(self.num_conv_branches): + rbr_conv.append(self._conv_bn(kernel_size=kernel_size, padding=padding)) + self.rbr_conv = nn.ModuleList(rbr_conv) + + # Re-parameterizable scale branch + self.rbr_scale = None + if kernel_size > 1: + self.rbr_scale = self._conv_bn(kernel_size=1, padding=0) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Apply forward pass.""" + # Inference mode forward pass. + if self.inference_mode: + return self.activation(self.se(self.reparam_conv(x))) + + # Multi-branched train-time forward pass. + # Skip branch output + identity_out = 0 + if self.rbr_skip is not None: + identity_out = self.rbr_skip(x) + + # Scale branch output + scale_out = 0 + if self.rbr_scale is not None: + scale_out = self.rbr_scale(x) + + # Other branches + out = scale_out + identity_out + for ix in range(self.num_conv_branches): + out += self.rbr_conv[ix](x) + + return self.activation(self.se(out)) + + def reparameterize(self): + """Following works like `RepVGG: Making VGG-style ConvNets Great Again` - + https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched + architecture used at training time to obtain a plain CNN-like structure + for inference. + """ + if self.inference_mode: + return + kernel, bias = self._get_kernel_bias() + self.reparam_conv = nn.Conv2d( + in_channels=self.rbr_conv[0].conv.in_channels, + out_channels=self.rbr_conv[0].conv.out_channels, + kernel_size=self.rbr_conv[0].conv.kernel_size, + stride=self.rbr_conv[0].conv.stride, + padding=self.rbr_conv[0].conv.padding, + dilation=self.rbr_conv[0].conv.dilation, + groups=self.rbr_conv[0].conv.groups, + bias=True, + ) + self.reparam_conv.weight.data = kernel + self.reparam_conv.bias.data = bias + + # Delete un-used branches + for para in self.parameters(): + para.detach_() + self.__delattr__("rbr_conv") + self.__delattr__("rbr_scale") + if hasattr(self, "rbr_skip"): + self.__delattr__("rbr_skip") + + self.inference_mode = True + + def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]: + """Obtain the re-parameterized kernel and bias. + Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83 + + :return: Tuple of (kernel, bias) after fusing branches. + """ + # get weights and bias of scale branch + kernel_scale = 0 + bias_scale = 0 + if self.rbr_scale is not None: + kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale) + # Pad scale branch kernel to match conv branch kernel size. + pad = self.kernel_size // 2 + kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad]) + + # get weights and bias of skip branch + kernel_identity = 0 + bias_identity = 0 + if self.rbr_skip is not None: + kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip) + + # get weights and bias of conv branches + kernel_conv = 0 + bias_conv = 0 + for ix in range(self.num_conv_branches): + _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix]) + kernel_conv += _kernel + bias_conv += _bias + + kernel_final = kernel_conv + kernel_scale + kernel_identity + bias_final = bias_conv + bias_scale + bias_identity + return kernel_final, bias_final + + def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]: + """Fuse batchnorm layer with preceeding conv layer. + Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95 + + :param branch: + :return: Tuple of (kernel, bias) after fusing batchnorm. + """ + if isinstance(branch, nn.Sequential): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, "id_tensor"): + input_dim = self.in_channels // self.groups + kernel_value = torch.zeros( + (self.in_channels, input_dim, self.kernel_size, self.kernel_size), + dtype=branch.weight.dtype, + device=branch.weight.device, + ) + for i in range(self.in_channels): + kernel_value[ + i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2 + ] = 1 + self.id_tensor = kernel_value + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def _conv_bn(self, kernel_size: int, padding: int) -> nn.Sequential: + """Construct conv-batchnorm layers. + + :param kernel_size: Size of the convolution kernel. + :param padding: Zero-padding size. + :return: Conv-BN module. + """ + mod_list = nn.Sequential() + mod_list.add_module( + "conv", + nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=kernel_size, + stride=self.stride, + padding=padding, + groups=self.groups, + bias=False, + ), + ) + mod_list.add_module("bn", nn.BatchNorm2d(num_features=self.out_channels)) + return mod_list + + +class MobileOne(nn.Module, EncoderMixin): + """MobileOne Model + + Pytorch implementation of `An Improved One millisecond Mobile Backbone` - + https://arxiv.org/pdf/2206.04040.pdf + """ + + def __init__( + self, + out_channels, + num_blocks_per_stage: List[int] = [2, 8, 10, 1], + width_multipliers: Optional[List[float]] = None, + inference_mode: bool = False, + use_se: bool = False, + depth=5, + in_channels=3, + num_conv_branches: int = 1, + ) -> None: + """Construct MobileOne model. + + :param num_blocks_per_stage: List of number of blocks per stage. + :param num_classes: Number of classes in the dataset. + :param width_multipliers: List of width multiplier for blocks in a stage. + :param inference_mode: If True, instantiates model in inference mode. + :param use_se: Whether to use SE-ReLU activations. + :param num_conv_branches: Number of linear conv branches. + """ + super().__init__() + + assert len(width_multipliers) == 4 + self.inference_mode = inference_mode + self._out_channels = out_channels + self.in_planes = min(64, int(64 * width_multipliers[0])) + self.use_se = use_se + self.num_conv_branches = num_conv_branches + self._depth = depth + self._in_channels = in_channels + self.set_in_channels(self._in_channels) + + # Build stages + self.stage0 = MobileOneBlock( + in_channels=self._in_channels, + out_channels=self.in_planes, + kernel_size=3, + stride=2, + padding=1, + inference_mode=self.inference_mode, + ) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage( + int(64 * width_multipliers[0]), num_blocks_per_stage[0], num_se_blocks=0 + ) + self.stage2 = self._make_stage( + int(128 * width_multipliers[1]), num_blocks_per_stage[1], num_se_blocks=0 + ) + self.stage3 = self._make_stage( + int(256 * width_multipliers[2]), + num_blocks_per_stage[2], + num_se_blocks=int(num_blocks_per_stage[2] // 2) if use_se else 0, + ) + self.stage4 = self._make_stage( + int(512 * width_multipliers[3]), + num_blocks_per_stage[3], + num_se_blocks=num_blocks_per_stage[3] if use_se else 0, + ) + + def get_stages(self): + return [ + nn.Identity(), + self.stage0, + self.stage1, + self.stage2, + self.stage3, + self.stage4, + ] + + def _make_stage( + self, planes: int, num_blocks: int, num_se_blocks: int + ) -> nn.Sequential: + """Build a stage of MobileOne model. + + :param planes: Number of output channels. + :param num_blocks: Number of blocks in this stage. + :param num_se_blocks: Number of SE blocks in this stage. + :return: A stage of MobileOne model. + """ + # Get strides for all layers + strides = [2] + [1] * (num_blocks - 1) + blocks = [] + for ix, stride in enumerate(strides): + use_se = False + if num_se_blocks > num_blocks: + raise ValueError( + "Number of SE blocks cannot " "exceed number of layers." + ) + if ix >= (num_blocks - num_se_blocks): + use_se = True + + # Depthwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=self.in_planes, + kernel_size=3, + stride=stride, + padding=1, + groups=self.in_planes, + inference_mode=self.inference_mode, + use_se=use_se, + num_conv_branches=self.num_conv_branches, + ) + ) + # Pointwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=planes, + kernel_size=1, + stride=1, + padding=0, + groups=1, + inference_mode=self.inference_mode, + use_se=use_se, + num_conv_branches=self.num_conv_branches, + ) + ) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.Sequential(*blocks) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Apply forward pass.""" + stages = self.get_stages() + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("linear.weight", None) + state_dict.pop("linear.bias", None) + super().load_state_dict(state_dict, **kwargs) + + def set_in_channels(self, in_channels, pretrained=True): + """Change first convolution channels""" + if in_channels == 3: + return + + self._in_channels = in_channels + self._out_channels = tuple([in_channels] + list(self._out_channels)[1:]) + utils.patch_first_conv( + model=self.stage0.rbr_conv, + new_in_channels=in_channels, + pretrained=pretrained, + ) + utils.patch_first_conv( + model=self.stage0.rbr_scale, + new_in_channels=in_channels, + pretrained=pretrained, + ) + + +def reparameterize_model(model: torch.nn.Module) -> nn.Module: + """Return a model where a multi-branched structure + used in training is re-parameterized into a single branch + for inference. + + :param model: MobileOne model in train mode. + :return: MobileOne model in inference mode. + """ + # Avoid editing original graph + model = copy.deepcopy(model) + for module in model.modules(): + if hasattr(module, "reparameterize"): + module.reparameterize() + return model + + +mobileone_encoders = { + "mobileone_s0": { + "encoder": MobileOne, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s0_unfused.pth.tar", # noqa + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": { + "out_channels": (3, 48, 48, 128, 256, 1024), + "width_multipliers": (0.75, 1.0, 1.0, 2.0), + "num_conv_branches": 4, + "inference_mode": False, + }, + }, + "mobileone_s1": { + "encoder": MobileOne, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s1_unfused.pth.tar", # noqa + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": { + "out_channels": (3, 64, 96, 192, 512, 1280), + "width_multipliers": (1.5, 1.5, 2.0, 2.5), + "inference_mode": False, + }, + }, + "mobileone_s2": { + "encoder": MobileOne, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s2_unfused.pth.tar", # noqa + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": { + "out_channels": (3, 64, 96, 256, 640, 2048), + "width_multipliers": (1.5, 2.0, 2.5, 4.0), + "inference_mode": False, + }, + }, + "mobileone_s3": { + "encoder": MobileOne, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s3_unfused.pth.tar", # noqa + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": { + "out_channels": (3, 64, 128, 320, 768, 2048), + "width_multipliers": (2.0, 2.5, 3.0, 4.0), + "inference_mode": False, + }, + }, + "mobileone_s4": { + "encoder": MobileOne, + "pretrained_settings": { + "imagenet": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s4_unfused.pth.tar", # noqa + "input_space": "RGB", + "input_range": [0, 1], + } + }, + "params": { + "out_channels": (3, 64, 192, 448, 896, 2048), + "width_multipliers": (3.0, 3.5, 3.5, 4.0), + "use_se": True, + "inference_mode": False, + }, + }, +} diff --git a/feature_extractor_models/encoders/resnet.py b/feature_extractor_models/encoders/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2040a42c3e9e7bd9f53a1cafadfb53ef21523338 --- /dev/null +++ b/feature_extractor_models/encoders/resnet.py @@ -0,0 +1,239 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +from copy import deepcopy + +import torch.nn as nn + +from torchvision.models.resnet import ResNet +from torchvision.models.resnet import BasicBlock +from torchvision.models.resnet import Bottleneck +from pretrainedmodels.models.torchvision_models import pretrained_settings + +from ._base import EncoderMixin + + +class ResNetEncoder(ResNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.fc + del self.avgpool + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv1, self.bn1, self.relu), + nn.Sequential(self.maxpool, self.layer1), + self.layer2, + self.layer3, + self.layer4, + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("fc.bias", None) + state_dict.pop("fc.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +new_settings = { + "resnet18": { + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth", # noqa + }, + "resnet50": { + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth", # noqa + }, + "resnext50_32x4d": { + "imagenet": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth", + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext50_32x4-ddb3e555.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext50_32x4-72679e44.pth", # noqa + }, + "resnext101_32x4d": { + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth", # noqa + }, + "resnext101_32x8d": { + "imagenet": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth", + "instagram": "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth", + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth", # noqa + }, + "resnext101_32x16d": { + "instagram": "https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth", + "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x16-15fffa57.pth", # noqa + "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth", # noqa + }, + "resnext101_32x32d": { + "instagram": "https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth" + }, + "resnext101_32x48d": { + "instagram": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth" + }, +} + +pretrained_settings = deepcopy(pretrained_settings) +for model_name, sources in new_settings.items(): + if model_name not in pretrained_settings: + pretrained_settings[model_name] = {} + + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + + +resnet_encoders = { + "resnet18": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnet18"], + "params": { + "out_channels": (3, 64, 64, 128, 256, 512), + "block": BasicBlock, + "layers": [2, 2, 2, 2], + }, + }, + "resnet34": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnet34"], + "params": { + "out_channels": (3, 64, 64, 128, 256, 512), + "block": BasicBlock, + "layers": [3, 4, 6, 3], + }, + }, + "resnet50": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnet50"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 6, 3], + }, + }, + "resnet101": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnet101"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + }, + }, + "resnet152": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnet152"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 8, 36, 3], + }, + }, + "resnext50_32x4d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext50_32x4d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 6, 3], + "groups": 32, + "width_per_group": 4, + }, + }, + "resnext101_32x4d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext101_32x4d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + "groups": 32, + "width_per_group": 4, + }, + }, + "resnext101_32x8d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext101_32x8d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + "groups": 32, + "width_per_group": 8, + }, + }, + "resnext101_32x16d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext101_32x16d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + "groups": 32, + "width_per_group": 16, + }, + }, + "resnext101_32x32d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext101_32x32d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + "groups": 32, + "width_per_group": 32, + }, + }, + "resnext101_32x48d": { + "encoder": ResNetEncoder, + "pretrained_settings": pretrained_settings["resnext101_32x48d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottleneck, + "layers": [3, 4, 23, 3], + "groups": 32, + "width_per_group": 48, + }, + }, +} diff --git a/feature_extractor_models/encoders/senet.py b/feature_extractor_models/encoders/senet.py new file mode 100644 index 0000000000000000000000000000000000000000..8e0f6fd851f07a0eed93146d63c39dad250de151 --- /dev/null +++ b/feature_extractor_models/encoders/senet.py @@ -0,0 +1,174 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch.nn as nn + +from pretrainedmodels.models.senet import ( + SENet, + SEBottleneck, + SEResNetBottleneck, + SEResNeXtBottleneck, + pretrained_settings, +) +from ._base import EncoderMixin + + +class SENetEncoder(SENet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + del self.last_linear + del self.avg_pool + + def get_stages(self): + return [ + nn.Identity(), + self.layer0[:-1], + nn.Sequential(self.layer0[-1], self.layer1), + self.layer2, + self.layer3, + self.layer4, + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("last_linear.bias", None) + state_dict.pop("last_linear.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +senet_encoders = { + "senet154": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["senet154"], + "params": { + "out_channels": (3, 128, 256, 512, 1024, 2048), + "block": SEBottleneck, + "dropout_p": 0.2, + "groups": 64, + "layers": [3, 8, 36, 3], + "num_classes": 1000, + "reduction": 16, + }, + }, + "se_resnet50": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["se_resnet50"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SEResNetBottleneck, + "layers": [3, 4, 6, 3], + "downsample_kernel_size": 1, + "downsample_padding": 0, + "dropout_p": None, + "groups": 1, + "inplanes": 64, + "input_3x3": False, + "num_classes": 1000, + "reduction": 16, + }, + }, + "se_resnet101": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["se_resnet101"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SEResNetBottleneck, + "layers": [3, 4, 23, 3], + "downsample_kernel_size": 1, + "downsample_padding": 0, + "dropout_p": None, + "groups": 1, + "inplanes": 64, + "input_3x3": False, + "num_classes": 1000, + "reduction": 16, + }, + }, + "se_resnet152": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["se_resnet152"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SEResNetBottleneck, + "layers": [3, 8, 36, 3], + "downsample_kernel_size": 1, + "downsample_padding": 0, + "dropout_p": None, + "groups": 1, + "inplanes": 64, + "input_3x3": False, + "num_classes": 1000, + "reduction": 16, + }, + }, + "se_resnext50_32x4d": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["se_resnext50_32x4d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SEResNeXtBottleneck, + "layers": [3, 4, 6, 3], + "downsample_kernel_size": 1, + "downsample_padding": 0, + "dropout_p": None, + "groups": 32, + "inplanes": 64, + "input_3x3": False, + "num_classes": 1000, + "reduction": 16, + }, + }, + "se_resnext101_32x4d": { + "encoder": SENetEncoder, + "pretrained_settings": pretrained_settings["se_resnext101_32x4d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SEResNeXtBottleneck, + "layers": [3, 4, 23, 3], + "downsample_kernel_size": 1, + "downsample_padding": 0, + "dropout_p": None, + "groups": 32, + "inplanes": 64, + "input_3x3": False, + "num_classes": 1000, + "reduction": 16, + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_efficientnet.py b/feature_extractor_models/encoders/timm_efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..fc248575a44254e2a09dc2f8e4f0f26b8a5f72d9 --- /dev/null +++ b/feature_extractor_models/encoders/timm_efficientnet.py @@ -0,0 +1,456 @@ +from functools import partial + +import torch.nn as nn + +from timm.models.efficientnet import EfficientNet +from timm.models.efficientnet import decode_arch_def, round_channels, default_cfgs +from timm.layers.activations import Swish + +from ._base import EncoderMixin + + +def get_efficientnet_kwargs( + channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2 +): + """Create EfficientNet model. + Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py + Paper: https://arxiv.org/abs/1905.11946 + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + 'efficientnet-b8': (2.2, 3.6, 672, 0.5), + 'efficientnet-l2': (4.3, 5.3, 800, 0.5), + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + """ + arch_def = [ + ["ds_r1_k3_s1_e1_c16_se0.25"], + ["ir_r2_k3_s2_e6_c24_se0.25"], + ["ir_r2_k5_s2_e6_c40_se0.25"], + ["ir_r3_k3_s2_e6_c80_se0.25"], + ["ir_r3_k5_s1_e6_c112_se0.25"], + ["ir_r4_k5_s2_e6_c192_se0.25"], + ["ir_r1_k3_s1_e6_c320_se0.25"], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=round_channels(1280, channel_multiplier, 8, None), + stem_size=32, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + act_layer=Swish, + drop_rate=drop_rate, + drop_path_rate=0.2, + ) + return model_kwargs + + +def gen_efficientnet_lite_kwargs( + channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2 +): + """EfficientNet-Lite model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite + Paper: https://arxiv.org/abs/1905.11946 + + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-lite0': (1.0, 1.0, 224, 0.2), + 'efficientnet-lite1': (1.0, 1.1, 240, 0.2), + 'efficientnet-lite2': (1.1, 1.2, 260, 0.3), + 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), + 'efficientnet-lite4': (1.4, 1.8, 300, 0.3), + + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + """ + arch_def = [ + ["ds_r1_k3_s1_e1_c16"], + ["ir_r2_k3_s2_e6_c24"], + ["ir_r2_k5_s2_e6_c40"], + ["ir_r3_k3_s2_e6_c80"], + ["ir_r3_k5_s1_e6_c112"], + ["ir_r4_k5_s2_e6_c192"], + ["ir_r1_k3_s1_e6_c320"], + ] + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True), + num_features=1280, + stem_size=32, + fix_stem=True, + round_chs_fn=partial(round_channels, multiplier=channel_multiplier), + act_layer=nn.ReLU6, + drop_rate=drop_rate, + drop_path_rate=0.2, + ) + return model_kwargs + + +class EfficientNetBaseEncoder(EfficientNet, EncoderMixin): + def __init__(self, stage_idxs, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + + self._stage_idxs = stage_idxs + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + del self.classifier + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv_stem, self.bn1), + self.blocks[: self._stage_idxs[0]], + self.blocks[self._stage_idxs[0] : self._stage_idxs[1]], + self.blocks[self._stage_idxs[1] : self._stage_idxs[2]], + self.blocks[self._stage_idxs[2] :], + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("classifier.bias", None) + state_dict.pop("classifier.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +class EfficientNetEncoder(EfficientNetBaseEncoder): + def __init__( + self, + stage_idxs, + out_channels, + depth=5, + channel_multiplier=1.0, + depth_multiplier=1.0, + drop_rate=0.2, + ): + kwargs = get_efficientnet_kwargs( + channel_multiplier, depth_multiplier, drop_rate + ) + super().__init__(stage_idxs, out_channels, depth, **kwargs) + + +class EfficientNetLiteEncoder(EfficientNetBaseEncoder): + def __init__( + self, + stage_idxs, + out_channels, + depth=5, + channel_multiplier=1.0, + depth_multiplier=1.0, + drop_rate=0.2, + ): + kwargs = gen_efficientnet_lite_kwargs( + channel_multiplier, depth_multiplier, drop_rate + ) + super().__init__(stage_idxs, out_channels, depth, **kwargs) + + +def prepare_settings(settings): + return { + "mean": settings.mean, + "std": settings.std, + "url": settings.url, + "input_range": (0, 1), + "input_space": "RGB", + } + + +timm_efficientnet_encoders = { + "timm-efficientnet-b0": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b0"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b0"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b0"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.0, + "depth_multiplier": 1.0, + "drop_rate": 0.2, + }, + }, + "timm-efficientnet-b1": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b1"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b1"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b1"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.0, + "depth_multiplier": 1.1, + "drop_rate": 0.2, + }, + }, + "timm-efficientnet-b2": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b2"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b2"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b2"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 32, 24, 48, 120, 352), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.1, + "depth_multiplier": 1.2, + "drop_rate": 0.3, + }, + }, + "timm-efficientnet-b3": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b3"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b3"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b3"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 40, 32, 48, 136, 384), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.2, + "depth_multiplier": 1.4, + "drop_rate": 0.3, + }, + }, + "timm-efficientnet-b4": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b4"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b4"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b4"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 48, 32, 56, 160, 448), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.4, + "depth_multiplier": 1.8, + "drop_rate": 0.4, + }, + }, + "timm-efficientnet-b5": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b5"].cfgs["in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b5"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b5"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 48, 40, 64, 176, 512), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.6, + "depth_multiplier": 2.2, + "drop_rate": 0.4, + }, + }, + "timm-efficientnet-b6": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b6"].cfgs["aa_in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b6"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b6"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 56, 40, 72, 200, 576), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.8, + "depth_multiplier": 2.6, + "drop_rate": 0.5, + }, + }, + "timm-efficientnet-b7": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b7"].cfgs["aa_in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b7"].cfgs["ap_in1k"] + ), + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_b7"].cfgs["ns_jft_in1k"] + ), + }, + "params": { + "out_channels": (3, 64, 48, 80, 224, 640), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 2.0, + "depth_multiplier": 3.1, + "drop_rate": 0.5, + }, + }, + "timm-efficientnet-b8": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_b8"].cfgs["ra_in1k"] + ), + "advprop": prepare_settings( + default_cfgs["tf_efficientnet_b8"].cfgs["ap_in1k"] + ), + }, + "params": { + "out_channels": (3, 72, 56, 88, 248, 704), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 2.2, + "depth_multiplier": 3.6, + "drop_rate": 0.5, + }, + }, + "timm-efficientnet-l2": { + "encoder": EfficientNetEncoder, + "pretrained_settings": { + "noisy-student": prepare_settings( + default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k"] + ), + "noisy-student-475": prepare_settings( + default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k_475"] + ), + }, + "params": { + "out_channels": (3, 136, 104, 176, 480, 1376), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 4.3, + "depth_multiplier": 5.3, + "drop_rate": 0.5, + }, + }, + "timm-tf_efficientnet_lite0": { + "encoder": EfficientNetLiteEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_lite0"].cfgs["in1k"] + ) + }, + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.0, + "depth_multiplier": 1.0, + "drop_rate": 0.2, + }, + }, + "timm-tf_efficientnet_lite1": { + "encoder": EfficientNetLiteEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_lite1"].cfgs["in1k"] + ) + }, + "params": { + "out_channels": (3, 32, 24, 40, 112, 320), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.0, + "depth_multiplier": 1.1, + "drop_rate": 0.2, + }, + }, + "timm-tf_efficientnet_lite2": { + "encoder": EfficientNetLiteEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_lite2"].cfgs["in1k"] + ) + }, + "params": { + "out_channels": (3, 32, 24, 48, 120, 352), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.1, + "depth_multiplier": 1.2, + "drop_rate": 0.3, + }, + }, + "timm-tf_efficientnet_lite3": { + "encoder": EfficientNetLiteEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_lite3"].cfgs["in1k"] + ) + }, + "params": { + "out_channels": (3, 32, 32, 48, 136, 384), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.2, + "depth_multiplier": 1.4, + "drop_rate": 0.3, + }, + }, + "timm-tf_efficientnet_lite4": { + "encoder": EfficientNetLiteEncoder, + "pretrained_settings": { + "imagenet": prepare_settings( + default_cfgs["tf_efficientnet_lite4"].cfgs["in1k"] + ) + }, + "params": { + "out_channels": (3, 32, 32, 56, 160, 448), + "stage_idxs": (2, 3, 5), + "channel_multiplier": 1.4, + "depth_multiplier": 1.8, + "drop_rate": 0.4, + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_gernet.py b/feature_extractor_models/encoders/timm_gernet.py new file mode 100644 index 0000000000000000000000000000000000000000..e0c3354d8043ea5a6e69053050485fcf0f66f2b1 --- /dev/null +++ b/feature_extractor_models/encoders/timm_gernet.py @@ -0,0 +1,124 @@ +from timm.models import ByoModelCfg, ByoBlockCfg, ByobNet + +from ._base import EncoderMixin +import torch.nn as nn + + +class GERNetEncoder(ByobNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.head + + def get_stages(self): + return [ + nn.Identity(), + self.stem, + self.stages[0], + self.stages[1], + self.stages[2], + nn.Sequential(self.stages[3], self.stages[4], self.final_conv), + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("head.fc.weight", None) + state_dict.pop("head.fc.bias", None) + super().load_state_dict(state_dict, **kwargs) + + +regnet_weights = { + "timm-gernet_s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_s-756b4751.pth" # noqa + }, + "timm-gernet_m": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_m-0873c53a.pth" # noqa + }, + "timm-gernet_l": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_l-f31e2e8d.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in regnet_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + +timm_gernet_encoders = { + "timm-gernet_s": { + "encoder": GERNetEncoder, + "pretrained_settings": pretrained_settings["timm-gernet_s"], + "params": { + "out_channels": (3, 13, 48, 48, 384, 1920), + "cfg": ByoModelCfg( + blocks=( + ByoBlockCfg(type="basic", d=1, c=48, s=2, gs=0, br=1.0), + ByoBlockCfg(type="basic", d=3, c=48, s=2, gs=0, br=1.0), + ByoBlockCfg(type="bottle", d=7, c=384, s=2, gs=0, br=1 / 4), + ByoBlockCfg(type="bottle", d=2, c=560, s=2, gs=1, br=3.0), + ByoBlockCfg(type="bottle", d=1, c=256, s=1, gs=1, br=3.0), + ), + stem_chs=13, + stem_pool=None, + num_features=1920, + ), + }, + }, + "timm-gernet_m": { + "encoder": GERNetEncoder, + "pretrained_settings": pretrained_settings["timm-gernet_m"], + "params": { + "out_channels": (3, 32, 128, 192, 640, 2560), + "cfg": ByoModelCfg( + blocks=( + ByoBlockCfg(type="basic", d=1, c=128, s=2, gs=0, br=1.0), + ByoBlockCfg(type="basic", d=2, c=192, s=2, gs=0, br=1.0), + ByoBlockCfg(type="bottle", d=6, c=640, s=2, gs=0, br=1 / 4), + ByoBlockCfg(type="bottle", d=4, c=640, s=2, gs=1, br=3.0), + ByoBlockCfg(type="bottle", d=1, c=640, s=1, gs=1, br=3.0), + ), + stem_chs=32, + stem_pool=None, + num_features=2560, + ), + }, + }, + "timm-gernet_l": { + "encoder": GERNetEncoder, + "pretrained_settings": pretrained_settings["timm-gernet_l"], + "params": { + "out_channels": (3, 32, 128, 192, 640, 2560), + "cfg": ByoModelCfg( + blocks=( + ByoBlockCfg(type="basic", d=1, c=128, s=2, gs=0, br=1.0), + ByoBlockCfg(type="basic", d=2, c=192, s=2, gs=0, br=1.0), + ByoBlockCfg(type="bottle", d=6, c=640, s=2, gs=0, br=1 / 4), + ByoBlockCfg(type="bottle", d=5, c=640, s=2, gs=1, br=3.0), + ByoBlockCfg(type="bottle", d=4, c=640, s=1, gs=1, br=3.0), + ), + stem_chs=32, + stem_pool=None, + num_features=2560, + ), + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_mobilenetv3.py b/feature_extractor_models/encoders/timm_mobilenetv3.py new file mode 100644 index 0000000000000000000000000000000000000000..ff733ab9c6dd03c8e4a887b46238b68901231e59 --- /dev/null +++ b/feature_extractor_models/encoders/timm_mobilenetv3.py @@ -0,0 +1,151 @@ +import timm +import numpy as np +import torch.nn as nn + +from ._base import EncoderMixin + + +def _make_divisible(x, divisible_by=8): + return int(np.ceil(x * 1.0 / divisible_by) * divisible_by) + + +class MobileNetV3Encoder(nn.Module, EncoderMixin): + def __init__(self, model_name, width_mult, depth=5, **kwargs): + super().__init__() + if "large" not in model_name and "small" not in model_name: + raise ValueError("MobileNetV3 wrong model name {}".format(model_name)) + + self._mode = "small" if "small" in model_name else "large" + self._depth = depth + self._out_channels = self._get_channels(self._mode, width_mult) + self._in_channels = 3 + + # minimal models replace hardswish with relu + self.model = timm.create_model( + model_name=model_name, + scriptable=True, # torch.jit scriptable + exportable=True, # onnx export + features_only=True, + ) + + def _get_channels(self, mode, width_mult): + if mode == "small": + channels = [16, 16, 24, 48, 576] + else: + channels = [16, 24, 40, 112, 960] + channels = [3] + [_make_divisible(x * width_mult) for x in channels] + return tuple(channels) + + def get_stages(self): + if self._mode == "small": + return [ + nn.Identity(), + nn.Sequential(self.model.conv_stem, self.model.bn1, self.model.act1), + self.model.blocks[0], + self.model.blocks[1], + self.model.blocks[2:4], + self.model.blocks[4:], + ] + elif self._mode == "large": + return [ + nn.Identity(), + nn.Sequential( + self.model.conv_stem, + self.model.bn1, + self.model.act1, + self.model.blocks[0], + ), + self.model.blocks[1], + self.model.blocks[2], + self.model.blocks[3:5], + self.model.blocks[5:], + ] + else: + ValueError( + "MobileNetV3 mode should be small or large, got {}".format(self._mode) + ) + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("conv_head.weight", None) + state_dict.pop("conv_head.bias", None) + state_dict.pop("classifier.weight", None) + state_dict.pop("classifier.bias", None) + self.model.load_state_dict(state_dict, **kwargs) + + +mobilenetv3_weights = { + "tf_mobilenetv3_large_075": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth" # noqa + }, + "tf_mobilenetv3_large_100": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth" # noqa + }, + "tf_mobilenetv3_large_minimal_100": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth" # noqa + }, + "tf_mobilenetv3_small_075": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth" # noqa + }, + "tf_mobilenetv3_small_100": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth" # noqa + }, + "tf_mobilenetv3_small_minimal_100": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in mobilenetv3_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "input_space": "RGB", + } + + +timm_mobilenetv3_encoders = { + "timm-mobilenetv3_large_075": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_075"], + "params": {"model_name": "tf_mobilenetv3_large_075", "width_mult": 0.75}, + }, + "timm-mobilenetv3_large_100": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_100"], + "params": {"model_name": "tf_mobilenetv3_large_100", "width_mult": 1.0}, + }, + "timm-mobilenetv3_large_minimal_100": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_minimal_100"], + "params": {"model_name": "tf_mobilenetv3_large_minimal_100", "width_mult": 1.0}, + }, + "timm-mobilenetv3_small_075": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_075"], + "params": {"model_name": "tf_mobilenetv3_small_075", "width_mult": 0.75}, + }, + "timm-mobilenetv3_small_100": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_100"], + "params": {"model_name": "tf_mobilenetv3_small_100", "width_mult": 1.0}, + }, + "timm-mobilenetv3_small_minimal_100": { + "encoder": MobileNetV3Encoder, + "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_minimal_100"], + "params": {"model_name": "tf_mobilenetv3_small_minimal_100", "width_mult": 1.0}, + }, +} diff --git a/feature_extractor_models/encoders/timm_regnet.py b/feature_extractor_models/encoders/timm_regnet.py new file mode 100644 index 0000000000000000000000000000000000000000..cc60b8ba9c64d7ed7ebfefc66a7ad296c74c11e9 --- /dev/null +++ b/feature_extractor_models/encoders/timm_regnet.py @@ -0,0 +1,350 @@ +from ._base import EncoderMixin +from timm.models.regnet import RegNet, RegNetCfg +import torch.nn as nn + + +class RegNetEncoder(RegNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + kwargs["cfg"] = RegNetCfg(**kwargs["cfg"]) + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.head + + def get_stages(self): + return [nn.Identity(), self.stem, self.s1, self.s2, self.s3, self.s4] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("head.fc.weight", None) + state_dict.pop("head.fc.bias", None) + super().load_state_dict(state_dict, **kwargs) + + +regnet_weights = { + "timm-regnetx_002": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth" # noqa + }, + "timm-regnetx_004": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth" # noqa + }, + "timm-regnetx_006": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth" # noqa + }, + "timm-regnetx_008": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth" # noqa + }, + "timm-regnetx_016": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth" # noqa + }, + "timm-regnetx_032": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth" # noqa + }, + "timm-regnetx_040": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth" # noqa + }, + "timm-regnetx_064": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth" # noqa + }, + "timm-regnetx_080": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth" # noqa + }, + "timm-regnetx_120": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth" # noqa + }, + "timm-regnetx_160": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth" # noqa + }, + "timm-regnetx_320": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth" # noqa + }, + "timm-regnety_002": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth" # noqa + }, + "timm-regnety_004": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth" # noqa + }, + "timm-regnety_006": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth" # noqa + }, + "timm-regnety_008": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth" # noqa + }, + "timm-regnety_016": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth" # noqa + }, + "timm-regnety_032": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth" # noqa + }, + "timm-regnety_040": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth" # noqa + }, + "timm-regnety_064": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth" # noqa + }, + "timm-regnety_080": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth" # noqa + }, + "timm-regnety_120": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth" # noqa + }, + "timm-regnety_160": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth" # noqa + }, + "timm-regnety_320": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in regnet_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + +# at this point I am too lazy to copy configs, so I just used the same configs from timm's repo + + +def _mcfg(**kwargs): + cfg = dict(se_ratio=0.0, bottle_ratio=1.0, stem_width=32) + cfg.update(**kwargs) + return cfg + + +timm_regnet_encoders = { + "timm-regnetx_002": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_002"], + "params": { + "out_channels": (3, 32, 24, 56, 152, 368), + "cfg": _mcfg(w0=24, wa=36.44, wm=2.49, group_size=8, depth=13), + }, + }, + "timm-regnetx_004": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_004"], + "params": { + "out_channels": (3, 32, 32, 64, 160, 384), + "cfg": _mcfg(w0=24, wa=24.48, wm=2.54, group_size=16, depth=22), + }, + }, + "timm-regnetx_006": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_006"], + "params": { + "out_channels": (3, 32, 48, 96, 240, 528), + "cfg": _mcfg(w0=48, wa=36.97, wm=2.24, group_size=24, depth=16), + }, + }, + "timm-regnetx_008": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_008"], + "params": { + "out_channels": (3, 32, 64, 128, 288, 672), + "cfg": _mcfg(w0=56, wa=35.73, wm=2.28, group_size=16, depth=16), + }, + }, + "timm-regnetx_016": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_016"], + "params": { + "out_channels": (3, 32, 72, 168, 408, 912), + "cfg": _mcfg(w0=80, wa=34.01, wm=2.25, group_size=24, depth=18), + }, + }, + "timm-regnetx_032": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_032"], + "params": { + "out_channels": (3, 32, 96, 192, 432, 1008), + "cfg": _mcfg(w0=88, wa=26.31, wm=2.25, group_size=48, depth=25), + }, + }, + "timm-regnetx_040": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_040"], + "params": { + "out_channels": (3, 32, 80, 240, 560, 1360), + "cfg": _mcfg(w0=96, wa=38.65, wm=2.43, group_size=40, depth=23), + }, + }, + "timm-regnetx_064": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_064"], + "params": { + "out_channels": (3, 32, 168, 392, 784, 1624), + "cfg": _mcfg(w0=184, wa=60.83, wm=2.07, group_size=56, depth=17), + }, + }, + "timm-regnetx_080": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_080"], + "params": { + "out_channels": (3, 32, 80, 240, 720, 1920), + "cfg": _mcfg(w0=80, wa=49.56, wm=2.88, group_size=120, depth=23), + }, + }, + "timm-regnetx_120": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_120"], + "params": { + "out_channels": (3, 32, 224, 448, 896, 2240), + "cfg": _mcfg(w0=168, wa=73.36, wm=2.37, group_size=112, depth=19), + }, + }, + "timm-regnetx_160": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_160"], + "params": { + "out_channels": (3, 32, 256, 512, 896, 2048), + "cfg": _mcfg(w0=216, wa=55.59, wm=2.1, group_size=128, depth=22), + }, + }, + "timm-regnetx_320": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnetx_320"], + "params": { + "out_channels": (3, 32, 336, 672, 1344, 2520), + "cfg": _mcfg(w0=320, wa=69.86, wm=2.0, group_size=168, depth=23), + }, + }, + # regnety + "timm-regnety_002": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_002"], + "params": { + "out_channels": (3, 32, 24, 56, 152, 368), + "cfg": _mcfg( + w0=24, wa=36.44, wm=2.49, group_size=8, depth=13, se_ratio=0.25 + ), + }, + }, + "timm-regnety_004": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_004"], + "params": { + "out_channels": (3, 32, 48, 104, 208, 440), + "cfg": _mcfg( + w0=48, wa=27.89, wm=2.09, group_size=8, depth=16, se_ratio=0.25 + ), + }, + }, + "timm-regnety_006": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_006"], + "params": { + "out_channels": (3, 32, 48, 112, 256, 608), + "cfg": _mcfg( + w0=48, wa=32.54, wm=2.32, group_size=16, depth=15, se_ratio=0.25 + ), + }, + }, + "timm-regnety_008": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_008"], + "params": { + "out_channels": (3, 32, 64, 128, 320, 768), + "cfg": _mcfg( + w0=56, wa=38.84, wm=2.4, group_size=16, depth=14, se_ratio=0.25 + ), + }, + }, + "timm-regnety_016": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_016"], + "params": { + "out_channels": (3, 32, 48, 120, 336, 888), + "cfg": _mcfg( + w0=48, wa=20.71, wm=2.65, group_size=24, depth=27, se_ratio=0.25 + ), + }, + }, + "timm-regnety_032": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_032"], + "params": { + "out_channels": (3, 32, 72, 216, 576, 1512), + "cfg": _mcfg( + w0=80, wa=42.63, wm=2.66, group_size=24, depth=21, se_ratio=0.25 + ), + }, + }, + "timm-regnety_040": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_040"], + "params": { + "out_channels": (3, 32, 128, 192, 512, 1088), + "cfg": _mcfg( + w0=96, wa=31.41, wm=2.24, group_size=64, depth=22, se_ratio=0.25 + ), + }, + }, + "timm-regnety_064": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_064"], + "params": { + "out_channels": (3, 32, 144, 288, 576, 1296), + "cfg": _mcfg( + w0=112, wa=33.22, wm=2.27, group_size=72, depth=25, se_ratio=0.25 + ), + }, + }, + "timm-regnety_080": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_080"], + "params": { + "out_channels": (3, 32, 168, 448, 896, 2016), + "cfg": _mcfg( + w0=192, wa=76.82, wm=2.19, group_size=56, depth=17, se_ratio=0.25 + ), + }, + }, + "timm-regnety_120": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_120"], + "params": { + "out_channels": (3, 32, 224, 448, 896, 2240), + "cfg": _mcfg( + w0=168, wa=73.36, wm=2.37, group_size=112, depth=19, se_ratio=0.25 + ), + }, + }, + "timm-regnety_160": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_160"], + "params": { + "out_channels": (3, 32, 224, 448, 1232, 3024), + "cfg": _mcfg( + w0=200, wa=106.23, wm=2.48, group_size=112, depth=18, se_ratio=0.25 + ), + }, + }, + "timm-regnety_320": { + "encoder": RegNetEncoder, + "pretrained_settings": pretrained_settings["timm-regnety_320"], + "params": { + "out_channels": (3, 32, 232, 696, 1392, 3712), + "cfg": _mcfg( + w0=232, wa=115.89, wm=2.53, group_size=232, depth=20, se_ratio=0.25 + ), + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_res2net.py b/feature_extractor_models/encoders/timm_res2net.py new file mode 100644 index 0000000000000000000000000000000000000000..e97043e357e3627de361e2f12e27b22bd3ca0d93 --- /dev/null +++ b/feature_extractor_models/encoders/timm_res2net.py @@ -0,0 +1,163 @@ +from ._base import EncoderMixin +from timm.models.resnet import ResNet +from timm.models.res2net import Bottle2neck +import torch.nn as nn + + +class Res2NetEncoder(ResNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.fc + del self.global_pool + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv1, self.bn1, self.act1), + nn.Sequential(self.maxpool, self.layer1), + self.layer2, + self.layer3, + self.layer4, + ] + + def make_dilated(self, *args, **kwargs): + raise ValueError("Res2Net encoders do not support dilated mode") + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("fc.bias", None) + state_dict.pop("fc.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +res2net_weights = { + "timm-res2net50_26w_4s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_4s-06e79181.pth" # noqa + }, + "timm-res2net50_48w_2s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_48w_2s-afed724a.pth" # noqa + }, + "timm-res2net50_14w_8s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_14w_8s-6527dddc.pth" # noqa + }, + "timm-res2net50_26w_6s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_6s-19041792.pth" # noqa + }, + "timm-res2net50_26w_8s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_8s-2c7c9f12.pth" # noqa + }, + "timm-res2net101_26w_4s": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net101_26w_4s-02a759a1.pth" # noqa + }, + "timm-res2next50": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2next50_4s-6ef7e7bf.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in res2net_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + + +timm_res2net_encoders = { + "timm-res2net50_26w_4s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net50_26w_4s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 26, + "block_args": {"scale": 4}, + }, + }, + "timm-res2net101_26w_4s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net101_26w_4s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 23, 3], + "base_width": 26, + "block_args": {"scale": 4}, + }, + }, + "timm-res2net50_26w_6s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net50_26w_6s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 26, + "block_args": {"scale": 6}, + }, + }, + "timm-res2net50_26w_8s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net50_26w_8s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 26, + "block_args": {"scale": 8}, + }, + }, + "timm-res2net50_48w_2s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net50_48w_2s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 48, + "block_args": {"scale": 2}, + }, + }, + "timm-res2net50_14w_8s": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2net50_14w_8s"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 14, + "block_args": {"scale": 8}, + }, + }, + "timm-res2next50": { + "encoder": Res2NetEncoder, + "pretrained_settings": pretrained_settings["timm-res2next50"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": Bottle2neck, + "layers": [3, 4, 6, 3], + "base_width": 4, + "cardinality": 8, + "block_args": {"scale": 4}, + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_resnest.py b/feature_extractor_models/encoders/timm_resnest.py new file mode 100644 index 0000000000000000000000000000000000000000..1599b6c8ace7a506ac12603c3439d04f5ca3185b --- /dev/null +++ b/feature_extractor_models/encoders/timm_resnest.py @@ -0,0 +1,208 @@ +from ._base import EncoderMixin +from timm.models.resnet import ResNet +from timm.models.resnest import ResNestBottleneck +import torch.nn as nn + + +class ResNestEncoder(ResNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.fc + del self.global_pool + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv1, self.bn1, self.act1), + nn.Sequential(self.maxpool, self.layer1), + self.layer2, + self.layer3, + self.layer4, + ] + + def make_dilated(self, *args, **kwargs): + raise ValueError("ResNest encoders do not support dilated mode") + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("fc.bias", None) + state_dict.pop("fc.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +resnest_weights = { + "timm-resnest14d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest14-9c8fe254.pth" # noqa + }, + "timm-resnest26d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest26-50eb607c.pth" # noqa + }, + "timm-resnest50d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50-528c19ca.pth" # noqa + }, + "timm-resnest101e": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest101-22405ba7.pth" # noqa + }, + "timm-resnest200e": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest200-75117900.pth" # noqa + }, + "timm-resnest269e": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest269-0cc87c48.pth" # noqa + }, + "timm-resnest50d_4s2x40d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_4s2x40d-41d14ed0.pth" # noqa + }, + "timm-resnest50d_1s4x24d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_1s4x24d-d4a4f76f.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in resnest_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + + +timm_resnest_encoders = { + "timm-resnest14d": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest14d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [1, 1, 1, 1], + "stem_type": "deep", + "stem_width": 32, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest26d": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest26d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [2, 2, 2, 2], + "stem_type": "deep", + "stem_width": 32, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest50d": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest50d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 4, 6, 3], + "stem_type": "deep", + "stem_width": 32, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest101e": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest101e"], + "params": { + "out_channels": (3, 128, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 4, 23, 3], + "stem_type": "deep", + "stem_width": 64, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest200e": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest200e"], + "params": { + "out_channels": (3, 128, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 24, 36, 3], + "stem_type": "deep", + "stem_width": 64, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest269e": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest269e"], + "params": { + "out_channels": (3, 128, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 30, 48, 8], + "stem_type": "deep", + "stem_width": 64, + "avg_down": True, + "base_width": 64, + "cardinality": 1, + "block_args": {"radix": 2, "avd": True, "avd_first": False}, + }, + }, + "timm-resnest50d_4s2x40d": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest50d_4s2x40d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 4, 6, 3], + "stem_type": "deep", + "stem_width": 32, + "avg_down": True, + "base_width": 40, + "cardinality": 2, + "block_args": {"radix": 4, "avd": True, "avd_first": True}, + }, + }, + "timm-resnest50d_1s4x24d": { + "encoder": ResNestEncoder, + "pretrained_settings": pretrained_settings["timm-resnest50d_1s4x24d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": ResNestBottleneck, + "layers": [3, 4, 6, 3], + "stem_type": "deep", + "stem_width": 32, + "avg_down": True, + "base_width": 24, + "cardinality": 4, + "block_args": {"radix": 1, "avd": True, "avd_first": True}, + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_sknet.py b/feature_extractor_models/encoders/timm_sknet.py new file mode 100644 index 0000000000000000000000000000000000000000..14d6d2b083df50957e4293ec856cfb3ee2b5a4c0 --- /dev/null +++ b/feature_extractor_models/encoders/timm_sknet.py @@ -0,0 +1,103 @@ +from ._base import EncoderMixin +from timm.models.resnet import ResNet +from timm.models.sknet import SelectiveKernelBottleneck, SelectiveKernelBasic +import torch.nn as nn + + +class SkNetEncoder(ResNet, EncoderMixin): + def __init__(self, out_channels, depth=5, **kwargs): + super().__init__(**kwargs) + self._depth = depth + self._out_channels = out_channels + self._in_channels = 3 + + del self.fc + del self.global_pool + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential(self.conv1, self.bn1, self.act1), + nn.Sequential(self.maxpool, self.layer1), + self.layer2, + self.layer3, + self.layer4, + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + state_dict.pop("fc.bias", None) + state_dict.pop("fc.weight", None) + super().load_state_dict(state_dict, **kwargs) + + +sknet_weights = { + "timm-skresnet18": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth" # noqa + }, + "timm-skresnet34": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth" # noqa + }, + "timm-skresnext50_32x4d": { + "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth" # noqa + }, +} + +pretrained_settings = {} +for model_name, sources in sknet_weights.items(): + pretrained_settings[model_name] = {} + for source_name, source_url in sources.items(): + pretrained_settings[model_name][source_name] = { + "url": source_url, + "input_size": [3, 224, 224], + "input_range": [0, 1], + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + "num_classes": 1000, + } + +timm_sknet_encoders = { + "timm-skresnet18": { + "encoder": SkNetEncoder, + "pretrained_settings": pretrained_settings["timm-skresnet18"], + "params": { + "out_channels": (3, 64, 64, 128, 256, 512), + "block": SelectiveKernelBasic, + "layers": [2, 2, 2, 2], + "zero_init_last": False, + "block_args": {"sk_kwargs": {"rd_ratio": 1 / 8, "split_input": True}}, + }, + }, + "timm-skresnet34": { + "encoder": SkNetEncoder, + "pretrained_settings": pretrained_settings["timm-skresnet34"], + "params": { + "out_channels": (3, 64, 64, 128, 256, 512), + "block": SelectiveKernelBasic, + "layers": [3, 4, 6, 3], + "zero_init_last": False, + "block_args": {"sk_kwargs": {"rd_ratio": 1 / 8, "split_input": True}}, + }, + }, + "timm-skresnext50_32x4d": { + "encoder": SkNetEncoder, + "pretrained_settings": pretrained_settings["timm-skresnext50_32x4d"], + "params": { + "out_channels": (3, 64, 256, 512, 1024, 2048), + "block": SelectiveKernelBottleneck, + "layers": [3, 4, 6, 3], + "zero_init_last": False, + "cardinality": 32, + "base_width": 4, + }, + }, +} diff --git a/feature_extractor_models/encoders/timm_universal.py b/feature_extractor_models/encoders/timm_universal.py new file mode 100644 index 0000000000000000000000000000000000000000..9702a7c34d16e9402b142682ea828b020deb961d --- /dev/null +++ b/feature_extractor_models/encoders/timm_universal.py @@ -0,0 +1,38 @@ +import timm +import torch.nn as nn + + +class TimmUniversalEncoder(nn.Module): + def __init__(self, name, pretrained=True, in_channels=3, depth=5, output_stride=32): + super().__init__() + kwargs = dict( + in_chans=in_channels, + features_only=True, + output_stride=output_stride, + pretrained=pretrained, + out_indices=tuple(range(depth)), + ) + + # not all models support output stride argument, drop it by default + if output_stride == 32: + kwargs.pop("output_stride") + + self.model = timm.create_model(name, **kwargs) + + self._in_channels = in_channels + self._out_channels = [in_channels] + self.model.feature_info.channels() + self._depth = depth + self._output_stride = output_stride + + def forward(self, x): + features = self.model(x) + features = [x] + features + return features + + @property + def out_channels(self): + return self._out_channels + + @property + def output_stride(self): + return min(self._output_stride, 2**self._depth) diff --git a/feature_extractor_models/encoders/vgg.py b/feature_extractor_models/encoders/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..cbc602c8e4ebbbed362893042e54843a692aabb3 --- /dev/null +++ b/feature_extractor_models/encoders/vgg.py @@ -0,0 +1,159 @@ +"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin` + +Attributes: + + _out_channels (list of int): specify number of channels for each encoder feature tensor + _depth (int): specify number of stages in decoder (in other words number of downsampling operations) + _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3) + +Methods: + + forward(self, x: torch.Tensor) + produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of + shape NCHW (features should be sorted in descending order according to spatial resolution, starting + with resolution same as input `x` tensor). + + Input: `x` with shape (1, 3, 64, 64) + Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes + [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8), + (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ) + + also should support number of features according to specified depth, e.g. if depth = 5, + number of feature tensors = 6 (one with same resolution as input and 5 downsampled), + depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled). +""" + +import torch.nn as nn +from torchvision.models.vgg import VGG +from torchvision.models.vgg import make_layers +from pretrainedmodels.models.torchvision_models import pretrained_settings + +from ._base import EncoderMixin + +# fmt: off +cfg = { + 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} +# fmt: on + + +class VGGEncoder(VGG, EncoderMixin): + def __init__(self, out_channels, config, batch_norm=False, depth=5, **kwargs): + super().__init__(make_layers(config, batch_norm=batch_norm), **kwargs) + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + del self.classifier + + def make_dilated(self, *args, **kwargs): + raise ValueError( + "'VGG' models do not support dilated mode due to Max Pooling" + " operations for downsampling!" + ) + + def get_stages(self): + stages = [] + stage_modules = [] + for module in self.features: + if isinstance(module, nn.MaxPool2d): + stages.append(nn.Sequential(*stage_modules)) + stage_modules = [] + stage_modules.append(module) + stages.append(nn.Sequential(*stage_modules)) + return stages + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict, **kwargs): + keys = list(state_dict.keys()) + for k in keys: + if k.startswith("classifier"): + state_dict.pop(k, None) + super().load_state_dict(state_dict, **kwargs) + + +vgg_encoders = { + "vgg11": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg11"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["A"], + "batch_norm": False, + }, + }, + "vgg11_bn": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg11_bn"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["A"], + "batch_norm": True, + }, + }, + "vgg13": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg13"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["B"], + "batch_norm": False, + }, + }, + "vgg13_bn": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg13_bn"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["B"], + "batch_norm": True, + }, + }, + "vgg16": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg16"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["D"], + "batch_norm": False, + }, + }, + "vgg16_bn": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg16_bn"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["D"], + "batch_norm": True, + }, + }, + "vgg19": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg19"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["E"], + "batch_norm": False, + }, + }, + "vgg19_bn": { + "encoder": VGGEncoder, + "pretrained_settings": pretrained_settings["vgg19_bn"], + "params": { + "out_channels": (64, 128, 256, 512, 512, 512), + "config": cfg["E"], + "batch_norm": True, + }, + }, +} diff --git a/feature_extractor_models/encoders/xception.py b/feature_extractor_models/encoders/xception.py new file mode 100644 index 0000000000000000000000000000000000000000..c8c476ced33c36aa271fd0c362c635b240ff6c32 --- /dev/null +++ b/feature_extractor_models/encoders/xception.py @@ -0,0 +1,77 @@ +import torch.nn as nn + +from pretrainedmodels.models.xception import pretrained_settings +from pretrainedmodels.models.xception import Xception + +from ._base import EncoderMixin + + +class XceptionEncoder(Xception, EncoderMixin): + def __init__(self, out_channels, *args, depth=5, **kwargs): + super().__init__(*args, **kwargs) + + self._out_channels = out_channels + self._depth = depth + self._in_channels = 3 + + # modify padding to maintain output shape + self.conv1.padding = (1, 1) + self.conv2.padding = (1, 1) + + del self.fc + + def make_dilated(self, *args, **kwargs): + raise ValueError( + "Xception encoder does not support dilated mode " + "due to pooling operation for downsampling!" + ) + + def get_stages(self): + return [ + nn.Identity(), + nn.Sequential( + self.conv1, self.bn1, self.relu, self.conv2, self.bn2, self.relu + ), + self.block1, + self.block2, + nn.Sequential( + self.block3, + self.block4, + self.block5, + self.block6, + self.block7, + self.block8, + self.block9, + self.block10, + self.block11, + ), + nn.Sequential( + self.block12, self.conv3, self.bn3, self.relu, self.conv4, self.bn4 + ), + ] + + def forward(self, x): + stages = self.get_stages() + + features = [] + for i in range(self._depth + 1): + x = stages[i](x) + features.append(x) + + return features + + def load_state_dict(self, state_dict): + # remove linear + state_dict.pop("fc.bias", None) + state_dict.pop("fc.weight", None) + + super().load_state_dict(state_dict) + + +xception_encoders = { + "xception": { + "encoder": XceptionEncoder, + "pretrained_settings": pretrained_settings["xception"], + "params": {"out_channels": (3, 64, 128, 256, 728, 2048)}, + } +} diff --git a/feature_extractor_models/utils/__init__.py b/feature_extractor_models/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4aeb8fa5d076e55c07f9dcabd4650e59f96b2db2 --- /dev/null +++ b/feature_extractor_models/utils/__init__.py @@ -0,0 +1,12 @@ +import warnings + +from . import train +from . import losses +from . import metrics + +__all__ = ["train", "losses", "metrics"] + +warnings.warn( + "`smp.utils` module is deprecated and will be removed in future releases.", + DeprecationWarning, +) diff --git a/feature_extractor_models/utils/base.py b/feature_extractor_models/utils/base.py new file mode 100644 index 0000000000000000000000000000000000000000..60def85de5ee5bf1575455fa5db75e0178e6369a --- /dev/null +++ b/feature_extractor_models/utils/base.py @@ -0,0 +1,67 @@ +import re +import torch.nn as nn + + +class BaseObject(nn.Module): + def __init__(self, name=None): + super().__init__() + self._name = name + + @property + def __name__(self): + if self._name is None: + name = self.__class__.__name__ + s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() + else: + return self._name + + +class Metric(BaseObject): + pass + + +class Loss(BaseObject): + def __add__(self, other): + if isinstance(other, Loss): + return SumOfLosses(self, other) + else: + raise ValueError("Loss should be inherited from `Loss` class") + + def __radd__(self, other): + return self.__add__(other) + + def __mul__(self, value): + if isinstance(value, (int, float)): + return MultipliedLoss(self, value) + else: + raise ValueError("Loss should be inherited from `BaseLoss` class") + + def __rmul__(self, other): + return self.__mul__(other) + + +class SumOfLosses(Loss): + def __init__(self, l1, l2): + name = "{} + {}".format(l1.__name__, l2.__name__) + super().__init__(name=name) + self.l1 = l1 + self.l2 = l2 + + def __call__(self, *inputs): + return self.l1.forward(*inputs) + self.l2.forward(*inputs) + + +class MultipliedLoss(Loss): + def __init__(self, loss, multiplier): + # resolve name + if len(loss.__name__.split("+")) > 1: + name = "{} * ({})".format(multiplier, loss.__name__) + else: + name = "{} * {}".format(multiplier, loss.__name__) + super().__init__(name=name) + self.loss = loss + self.multiplier = multiplier + + def __call__(self, *inputs): + return self.multiplier * self.loss.forward(*inputs) diff --git a/feature_extractor_models/utils/functional.py b/feature_extractor_models/utils/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..261a1ee8fdd4ad55bc0de42e520b0a28a66b624e --- /dev/null +++ b/feature_extractor_models/utils/functional.py @@ -0,0 +1,132 @@ +import torch + + +def _take_channels(*xs, ignore_channels=None): + if ignore_channels is None: + return xs + else: + channels = [ + channel + for channel in range(xs[0].shape[1]) + if channel not in ignore_channels + ] + xs = [ + torch.index_select(x, dim=1, index=torch.tensor(channels).to(x.device)) + for x in xs + ] + return xs + + +def _threshold(x, threshold=None): + if threshold is not None: + return (x > threshold).type(x.dtype) + else: + return x + + +def iou(pr, gt, eps=1e-7, threshold=None, ignore_channels=None): + """Calculate Intersection over Union between ground truth and prediction + Args: + pr (torch.Tensor): predicted tensor + gt (torch.Tensor): ground truth tensor + eps (float): epsilon to avoid zero division + threshold: threshold for outputs binarization + Returns: + float: IoU (Jaccard) score + """ + + pr = _threshold(pr, threshold=threshold) + pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels) + + intersection = torch.sum(gt * pr) + union = torch.sum(gt) + torch.sum(pr) - intersection + eps + return (intersection + eps) / union + + +jaccard = iou + + +def f_score(pr, gt, beta=1, eps=1e-7, threshold=None, ignore_channels=None): + """Calculate F-score between ground truth and prediction + Args: + pr (torch.Tensor): predicted tensor + gt (torch.Tensor): ground truth tensor + beta (float): positive constant + eps (float): epsilon to avoid zero division + threshold: threshold for outputs binarization + Returns: + float: F score + """ + + pr = _threshold(pr, threshold=threshold) + pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels) + + tp = torch.sum(gt * pr) + fp = torch.sum(pr) - tp + fn = torch.sum(gt) - tp + + score = ((1 + beta**2) * tp + eps) / ((1 + beta**2) * tp + beta**2 * fn + fp + eps) + + return score + + +def accuracy(pr, gt, threshold=0.5, ignore_channels=None): + """Calculate accuracy score between ground truth and prediction + Args: + pr (torch.Tensor): predicted tensor + gt (torch.Tensor): ground truth tensor + eps (float): epsilon to avoid zero division + threshold: threshold for outputs binarization + Returns: + float: precision score + """ + pr = _threshold(pr, threshold=threshold) + pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels) + + tp = torch.sum(gt == pr, dtype=pr.dtype) + score = tp / gt.view(-1).shape[0] + return score + + +def precision(pr, gt, eps=1e-7, threshold=None, ignore_channels=None): + """Calculate precision score between ground truth and prediction + Args: + pr (torch.Tensor): predicted tensor + gt (torch.Tensor): ground truth tensor + eps (float): epsilon to avoid zero division + threshold: threshold for outputs binarization + Returns: + float: precision score + """ + + pr = _threshold(pr, threshold=threshold) + pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels) + + tp = torch.sum(gt * pr) + fp = torch.sum(pr) - tp + + score = (tp + eps) / (tp + fp + eps) + + return score + + +def recall(pr, gt, eps=1e-7, threshold=None, ignore_channels=None): + """Calculate Recall between ground truth and prediction + Args: + pr (torch.Tensor): A list of predicted elements + gt (torch.Tensor): A list of elements that are to be predicted + eps (float): epsilon to avoid zero division + threshold: threshold for outputs binarization + Returns: + float: recall score + """ + + pr = _threshold(pr, threshold=threshold) + pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels) + + tp = torch.sum(gt * pr) + fn = torch.sum(gt) - tp + + score = (tp + eps) / (tp + fn + eps) + + return score diff --git a/feature_extractor_models/utils/losses.py b/feature_extractor_models/utils/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a87c8eb972696db63f08b903a1b84168e1aea6 --- /dev/null +++ b/feature_extractor_models/utils/losses.py @@ -0,0 +1,69 @@ +import torch.nn as nn + +from . import base +from . import functional as F +from ..base.modules import Activation + + +class JaccardLoss(base.Loss): + def __init__(self, eps=1.0, activation=None, ignore_channels=None, **kwargs): + super().__init__(**kwargs) + self.eps = eps + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return 1 - F.jaccard( + y_pr, + y_gt, + eps=self.eps, + threshold=None, + ignore_channels=self.ignore_channels, + ) + + +class DiceLoss(base.Loss): + def __init__( + self, eps=1.0, beta=1.0, activation=None, ignore_channels=None, **kwargs + ): + super().__init__(**kwargs) + self.eps = eps + self.beta = beta + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return 1 - F.f_score( + y_pr, + y_gt, + beta=self.beta, + eps=self.eps, + threshold=None, + ignore_channels=self.ignore_channels, + ) + + +class L1Loss(nn.L1Loss, base.Loss): + pass + + +class MSELoss(nn.MSELoss, base.Loss): + pass + + +class CrossEntropyLoss(nn.CrossEntropyLoss, base.Loss): + pass + + +class NLLLoss(nn.NLLLoss, base.Loss): + pass + + +class BCELoss(nn.BCELoss, base.Loss): + pass + + +class BCEWithLogitsLoss(nn.BCEWithLogitsLoss, base.Loss): + pass diff --git a/feature_extractor_models/utils/meter.py b/feature_extractor_models/utils/meter.py new file mode 100644 index 0000000000000000000000000000000000000000..9faa720cef0368474b8238d6a530cb2bfd5164f3 --- /dev/null +++ b/feature_extractor_models/utils/meter.py @@ -0,0 +1,61 @@ +import numpy as np + + +class Meter(object): + """Meters provide a way to keep track of important statistics in an online manner. + This class is abstract, but provides a standard interface for all meters to follow. + """ + + def reset(self): + """Reset the meter to default settings.""" + pass + + def add(self, value): + """Log a new value to the meter + Args: + value: Next result to include. + """ + pass + + def value(self): + """Get the value of the meter in the current state.""" + pass + + +class AverageValueMeter(Meter): + def __init__(self): + super(AverageValueMeter, self).__init__() + self.reset() + self.val = 0 + + def add(self, value, n=1): + self.val = value + self.sum += value + self.var += value * value + self.n += n + + if self.n == 0: + self.mean, self.std = np.nan, np.nan + elif self.n == 1: + self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy + self.std = np.inf + self.mean_old = self.mean + self.m_s = 0.0 + else: + self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n) + self.m_s += (value - self.mean_old) * (value - self.mean) + self.mean_old = self.mean + self.std = np.sqrt(self.m_s / (self.n - 1.0)) + + def value(self): + return self.mean, self.std + + def reset(self): + self.n = 0 + self.sum = 0.0 + self.var = 0.0 + self.val = 0.0 + self.mean = np.nan + self.mean_old = 0.0 + self.m_s = 0.0 + self.std = np.nan diff --git a/feature_extractor_models/utils/metrics.py b/feature_extractor_models/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..65df21708cec34d076d9df6bb6bd4755fd3bbccb --- /dev/null +++ b/feature_extractor_models/utils/metrics.py @@ -0,0 +1,111 @@ +from . import base +from . import functional as F +from ..base.modules import Activation + + +class IoU(base.Metric): + __name__ = "iou_score" + + def __init__( + self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs + ): + super().__init__(**kwargs) + self.eps = eps + self.threshold = threshold + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return F.iou( + y_pr, + y_gt, + eps=self.eps, + threshold=self.threshold, + ignore_channels=self.ignore_channels, + ) + + +class Fscore(base.Metric): + def __init__( + self, + beta=1, + eps=1e-7, + threshold=0.5, + activation=None, + ignore_channels=None, + **kwargs, + ): + super().__init__(**kwargs) + self.eps = eps + self.beta = beta + self.threshold = threshold + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return F.f_score( + y_pr, + y_gt, + eps=self.eps, + beta=self.beta, + threshold=self.threshold, + ignore_channels=self.ignore_channels, + ) + + +class Accuracy(base.Metric): + def __init__(self, threshold=0.5, activation=None, ignore_channels=None, **kwargs): + super().__init__(**kwargs) + self.threshold = threshold + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return F.accuracy( + y_pr, y_gt, threshold=self.threshold, ignore_channels=self.ignore_channels + ) + + +class Recall(base.Metric): + def __init__( + self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs + ): + super().__init__(**kwargs) + self.eps = eps + self.threshold = threshold + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return F.recall( + y_pr, + y_gt, + eps=self.eps, + threshold=self.threshold, + ignore_channels=self.ignore_channels, + ) + + +class Precision(base.Metric): + def __init__( + self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs + ): + super().__init__(**kwargs) + self.eps = eps + self.threshold = threshold + self.activation = Activation(activation) + self.ignore_channels = ignore_channels + + def forward(self, y_pr, y_gt): + y_pr = self.activation(y_pr) + return F.precision( + y_pr, + y_gt, + eps=self.eps, + threshold=self.threshold, + ignore_channels=self.ignore_channels, + ) diff --git a/feature_extractor_models/utils/train.py b/feature_extractor_models/utils/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8c087c6b0691ab665021e613f700ad209e86b076 --- /dev/null +++ b/feature_extractor_models/utils/train.py @@ -0,0 +1,116 @@ +import sys +import torch +from tqdm import tqdm as tqdm +from .meter import AverageValueMeter + + +class Epoch: + def __init__(self, model, loss, metrics, stage_name, device="cpu", verbose=True): + self.model = model + self.loss = loss + self.metrics = metrics + self.stage_name = stage_name + self.verbose = verbose + self.device = device + + self._to_device() + + def _to_device(self): + self.model.to(self.device) + self.loss.to(self.device) + for metric in self.metrics: + metric.to(self.device) + + def _format_logs(self, logs): + str_logs = ["{} - {:.4}".format(k, v) for k, v in logs.items()] + s = ", ".join(str_logs) + return s + + def batch_update(self, x, y): + raise NotImplementedError + + def on_epoch_start(self): + pass + + def run(self, dataloader): + self.on_epoch_start() + + logs = {} + loss_meter = AverageValueMeter() + metrics_meters = { + metric.__name__: AverageValueMeter() for metric in self.metrics + } + + with tqdm( + dataloader, + desc=self.stage_name, + file=sys.stdout, + disable=not (self.verbose), + ) as iterator: + for x, y in iterator: + x, y = x.to(self.device), y.to(self.device) + loss, y_pred = self.batch_update(x, y) + + # update loss logs + loss_value = loss.cpu().detach().numpy() + loss_meter.add(loss_value) + loss_logs = {self.loss.__name__: loss_meter.mean} + logs.update(loss_logs) + + # update metrics logs + for metric_fn in self.metrics: + metric_value = metric_fn(y_pred, y).cpu().detach().numpy() + metrics_meters[metric_fn.__name__].add(metric_value) + metrics_logs = {k: v.mean for k, v in metrics_meters.items()} + logs.update(metrics_logs) + + if self.verbose: + s = self._format_logs(logs) + iterator.set_postfix_str(s) + + return logs + + +class TrainEpoch(Epoch): + def __init__(self, model, loss, metrics, optimizer, device="cpu", verbose=True): + super().__init__( + model=model, + loss=loss, + metrics=metrics, + stage_name="train", + device=device, + verbose=verbose, + ) + self.optimizer = optimizer + + def on_epoch_start(self): + self.model.train() + + def batch_update(self, x, y): + self.optimizer.zero_grad() + prediction = self.model.forward(x) + loss = self.loss(prediction, y) + loss.backward() + self.optimizer.step() + return loss, prediction + + +class ValidEpoch(Epoch): + def __init__(self, model, loss, metrics, device="cpu", verbose=True): + super().__init__( + model=model, + loss=loss, + metrics=metrics, + stage_name="valid", + device=device, + verbose=verbose, + ) + + def on_epoch_start(self): + self.model.eval() + + def batch_update(self, x, y): + with torch.no_grad(): + prediction = self.model.forward(x) + loss = self.loss(prediction, y) + return loss, prediction diff --git a/logger.py b/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..56b4d718091ad22d84d59387ca76628aa242555e --- /dev/null +++ b/logger.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from pathlib import Path +import logging + +import pytorch_lightning # noqa: F401 + + +formatter = logging.Formatter( + fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +handler = logging.StreamHandler() +handler.setFormatter(formatter) +handler.setLevel(logging.INFO) + +logger = logging.getLogger("maploc") +logger.setLevel(logging.INFO) +logger.addHandler(handler) +logger.propagate = False + +pl_logger = logging.getLogger("pytorch_lightning") +if len(pl_logger.handlers): + pl_logger.handlers[0].setFormatter(formatter) + +repo_dir = Path(__file__).parent +EXPERIMENTS_PATH = repo_dir / "experiments/" +DATASETS_PATH = repo_dir / "datasets/" diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6de8926cef52331393188c986e483179d50472bb --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,33 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich +# https://github.com/cvg/pixloc +# Released under the Apache License 2.0 + +import inspect + +from .base import BaseModel + +def get_class(mod_name, base_path, BaseClass): + """Get the class object which inherits from BaseClass and is defined in + the module named mod_name, child of base_path. + """ + mod_path = "{}.{}".format(base_path, mod_name) + mod = __import__(mod_path, fromlist=[""]) + classes = inspect.getmembers(mod, inspect.isclass) + # Filter classes defined in the module + classes = [c for c in classes if c[1].__module__ == mod_path] + # Filter classes inherited from BaseModel + classes = [c for c in classes if issubclass(c[1], BaseClass)] + assert len(classes) == 1, classes + return classes[0][1] + + +def get_model(name): + if name == "localizer": + name = "localizer_basic" + elif name == "rotation_localizer": + name = "localizer_basic_rotation" + elif name == "bev_localizer": + name = "localizer_bev_plane" + return get_class(name, __name__, BaseModel) diff --git a/models/base.py b/models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a9978eabb3c32fcd98f12399347f4c864e463494 --- /dev/null +++ b/models/base.py @@ -0,0 +1,123 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich +# https://github.com/cvg/pixloc +# Released under the Apache License 2.0 + +""" +Base class for trainable models. +""" + +from abc import ABCMeta, abstractmethod +from copy import copy + +import omegaconf +from omegaconf import OmegaConf +from torch import nn + + +class BaseModel(nn.Module, metaclass=ABCMeta): + """ + What the child model is expect to declare: + default_conf: dictionary of the default configuration of the model. + It recursively updates the default_conf of all parent classes, and + it is updated by the user-provided configuration passed to __init__. + Configurations can be nested. + + required_data_keys: list of expected keys in the input data dictionary. + + strict_conf (optional): boolean. If false, BaseModel does not raise + an error when the user provides an unknown configuration entry. + + _init(self, conf): initialization method, where conf is the final + configuration object (also accessible with `self.conf`). Accessing + unknown configuration entries will raise an error. + + _forward(self, data): method that returns a dictionary of batched + prediction tensors based on a dictionary of batched input data tensors. + + loss(self, pred, data): method that returns a dictionary of losses, + computed from model predictions and input data. Each loss is a batch + of scalars, i.e. a torch.Tensor of shape (B,). + The total loss to be optimized has the key `'total'`. + + metrics(self, pred, data): method that returns a dictionary of metrics, + each as a batch of scalars. + """ + + base_default_conf = { + "name": None, + "trainable": True, # if false: do not optimize this model parameters + "freeze_batch_normalization": False, # use test-time statistics + } + default_conf = {} + required_data_keys = [] + strict_conf = True + + def __init__(self, conf): + """Perform some logic and call the _init method of the child model.""" + super().__init__() + default_conf = OmegaConf.merge( + self.base_default_conf, OmegaConf.create(self.default_conf) + ) + if self.strict_conf: + OmegaConf.set_struct(default_conf, True) + + # fixme: backward compatibility + if "pad" in conf and "pad" not in default_conf: # backward compat. + with omegaconf.read_write(conf): + with omegaconf.open_dict(conf): + conf["interpolation"] = {"pad": conf.pop("pad")} + + if isinstance(conf, dict): + conf = OmegaConf.create(conf) + self.conf = conf = OmegaConf.merge(default_conf, conf) + OmegaConf.set_readonly(conf, True) + OmegaConf.set_struct(conf, True) + self.required_data_keys = copy(self.required_data_keys) + self._init(conf) + + if not conf.trainable: + for p in self.parameters(): + p.requires_grad = False + + def train(self, mode=True): + super().train(mode) + + def freeze_bn(module): + if isinstance(module, nn.modules.batchnorm._BatchNorm): + module.eval() + + if self.conf.freeze_batch_normalization: + self.apply(freeze_bn) + + return self + + def forward(self, data): + """Check the data and call the _forward method of the child model.""" + + def recursive_key_check(expected, given): + for key in expected: + assert key in given, f"Missing key {key} in data" + if isinstance(expected, dict): + recursive_key_check(expected[key], given[key]) + + recursive_key_check(self.required_data_keys, data) + return self._forward(data) + + @abstractmethod + def _init(self, conf): + """To be implemented by the child class.""" + raise NotImplementedError + + @abstractmethod + def _forward(self, data): + """To be implemented by the child class.""" + raise NotImplementedError + + def loss(self, pred, data): + """To be implemented by the child class.""" + raise NotImplementedError + + def metrics(self): + return {} # no metrics diff --git a/models/bev_net.py b/models/bev_net.py new file mode 100644 index 0000000000000000000000000000000000000000..35056ca9a1d08f64262f7921fa2dcb4278d73530 --- /dev/null +++ b/models/bev_net.py @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import torch.nn as nn +from torchvision.models.resnet import Bottleneck + +from .base import BaseModel +from .feature_extractor import AdaptationBlock +from .utils import checkpointed + + +class BEVNet(BaseModel): + default_conf = { + "pretrained": True, + "num_blocks": "???", + "latent_dim": "???", + "input_dim": "${.latent_dim}", + "output_dim": "${.latent_dim}", + "confidence": False, + "norm_layer": "nn.BatchNorm2d", # normalization ind decoder blocks + "checkpointed": False, # whether to use gradient checkpointing + "padding": "zeros", + } + + def _init(self, conf): + blocks = [] + Block = checkpointed(Bottleneck, do=conf.checkpointed) + for i in range(conf.num_blocks): + dim = conf.input_dim if i == 0 else conf.latent_dim + blocks.append( + Block( + dim, + conf.latent_dim // Bottleneck.expansion, + norm_layer=eval(conf.norm_layer), + ) + ) + self.blocks = nn.Sequential(*blocks) + self.output_layer = AdaptationBlock(conf.latent_dim, conf.output_dim) + if conf.confidence: + self.confidence_layer = AdaptationBlock(conf.latent_dim, 1) + + def update_padding(module): + if isinstance(module, nn.Conv2d): + module.padding_mode = conf.padding + + if conf.padding != "zeros": + self.bocks.apply(update_padding) + + def _forward(self, data): + features = self.blocks(data["input"]) + pred = { + "output": self.output_layer(features), + } + if self.conf.confidence: + pred["confidence"] = self.confidence_layer(features).squeeze(1).sigmoid() + return pred + + def loss(self, pred, data): + raise NotImplementedError + + def metrics(self, pred, data): + raise NotImplementedError diff --git a/models/bev_projection.py b/models/bev_projection.py new file mode 100644 index 0000000000000000000000000000000000000000..af49c115d4bf4a2aa0d71f1a111672908bbd839a --- /dev/null +++ b/models/bev_projection.py @@ -0,0 +1,91 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import torch +from torch.nn.functional import grid_sample + +from utils.geometry import from_homogeneous +from models.utils import make_grid + + +class PolarProjectionDepth(torch.nn.Module): + def __init__(self, z_max, ppm, scale_range, z_min=None): + super().__init__() + self.z_max = z_max + self.Δ = Δ = 1 / ppm + self.z_min = z_min = Δ if z_min is None else z_min + self.scale_range = scale_range + z_steps = torch.arange(z_min, z_max + Δ, Δ) + self.register_buffer("depth_steps", z_steps, persistent=False) + + def sample_depth_scores(self, pixel_scales, camera): + scale_steps = camera.f[..., None, 1] / self.depth_steps.flip(-1) + log_scale_steps = torch.log2(scale_steps) + scale_min, scale_max = self.scale_range + log_scale_norm = (log_scale_steps - scale_min) / (scale_max - scale_min) + log_scale_norm = log_scale_norm * 2 - 1 # in [-1, 1] + + values = pixel_scales.flatten(1, 2).unsqueeze(-1) + indices = log_scale_norm.unsqueeze(-1) + indices = torch.stack([torch.zeros_like(indices), indices], -1) + depth_scores = grid_sample(values, indices, align_corners=True) + depth_scores = depth_scores.reshape( + pixel_scales.shape[:-1] + (len(self.depth_steps),) + ) + return depth_scores + + def forward( + self, + image, + pixel_scales, + camera, + return_total_score=False, + ): + depth_scores = self.sample_depth_scores(pixel_scales, camera) + depth_prob = torch.softmax(depth_scores, dim=1) + image_polar = torch.einsum("...dhw,...hwz->...dzw", image, depth_prob) + if return_total_score: + cell_score = torch.logsumexp(depth_scores, dim=1, keepdim=True) + return image_polar, cell_score.squeeze(1) + return image_polar + + +class CartesianProjection(torch.nn.Module): + def __init__(self, z_max, x_max, ppm, z_min=None): + super().__init__() + self.z_max = z_max + self.x_max = x_max + self.Δ = Δ = 1 / ppm + self.z_min = z_min = Δ if z_min is None else z_min + + grid_xz = make_grid( + x_max * 2 + Δ, z_max, step_y=Δ, step_x=Δ, orig_y=Δ, orig_x=-x_max, y_up=True + ) + self.register_buffer("grid_xz", grid_xz, persistent=False) + + def grid_to_polar(self, cam): + f, c = cam.f[..., 0][..., None, None], cam.c[..., 0][..., None, None] + u = from_homogeneous(self.grid_xz).squeeze(-1) * f + c + z_idx = (self.grid_xz[..., 1] - self.z_min) / self.Δ # convert z value to index + z_idx = z_idx[None].expand_as(u) + grid_polar = torch.stack([u, z_idx], -1) + return grid_polar + + def sample_from_polar(self, image_polar, valid_polar, grid_uz): + size = grid_uz.new_tensor(image_polar.shape[-2:][::-1]) + grid_uz_norm = (grid_uz + 0.5) / size * 2 - 1 + grid_uz_norm = grid_uz_norm * grid_uz.new_tensor([1, -1]) # y axis is up + image_bev = grid_sample(image_polar, grid_uz_norm, align_corners=False) + + if valid_polar is None: + valid = torch.ones_like(image_polar[..., :1, :, :]) + else: + valid = valid_polar.to(image_polar)[:, None] + valid = grid_sample(valid, grid_uz_norm, align_corners=False) + valid = valid.squeeze(1) > (1 - 1e-4) + + return image_bev, valid + + def forward(self, image_polar, valid_polar, cam): + grid_uz = self.grid_to_polar(cam) + image, valid = self.sample_from_polar(image_polar, valid_polar, grid_uz) + return image, valid, grid_uz diff --git a/models/feature_extractor.py b/models/feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..1729f4668d9f983ccbad6db2d4faa98e9a4f0915 --- /dev/null +++ b/models/feature_extractor.py @@ -0,0 +1,232 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich +# https://github.com/cvg/pixloc +# Released under the Apache License 2.0 + +""" +Flexible UNet model which takes any Torchvision backbone as encoder. +Predicts multi-level feature and makes sure that they are well aligned. +""" + +import torch +import torch.nn as nn +import torchvision + +from .base import BaseModel +from .utils import checkpointed + + +class DecoderBlock(nn.Module): + def __init__( + self, previous, skip, out, num_convs=1, norm=nn.BatchNorm2d, padding="zeros" + ): + super().__init__() + + self.upsample = nn.Upsample( + scale_factor=2, mode="bilinear", align_corners=False + ) + + layers = [] + for i in range(num_convs): + conv = nn.Conv2d( + previous + skip if i == 0 else out, + out, + kernel_size=3, + padding=1, + bias=norm is None, + padding_mode=padding, + ) + layers.append(conv) + if norm is not None: + layers.append(norm(out)) + layers.append(nn.ReLU(inplace=True)) + self.layers = nn.Sequential(*layers) + + def forward(self, previous, skip): + upsampled = self.upsample(previous) + # If the shape of the input map `skip` is not a multiple of 2, + # it will not match the shape of the upsampled map `upsampled`. + # If the downsampling uses ceil_mode=False, we nedd to crop `skip`. + # If it uses ceil_mode=True (not supported here), we should pad it. + _, _, hu, wu = upsampled.shape + _, _, hs, ws = skip.shape + assert (hu <= hs) and (wu <= ws), "Using ceil_mode=True in pooling?" + # assert (hu == hs) and (wu == ws), 'Careful about padding' + skip = skip[:, :, :hu, :wu] + return self.layers(torch.cat([upsampled, skip], dim=1)) + + +class AdaptationBlock(nn.Sequential): + def __init__(self, inp, out): + conv = nn.Conv2d(inp, out, kernel_size=1, padding=0, bias=True) + super().__init__(conv) + + +class FeatureExtractor(BaseModel): + default_conf = { + "pretrained": True, + "input_dim": 3, + "output_scales": [0, 2, 4], # what scales to adapt and output + "output_dim": 128, # # of channels in output feature maps + "encoder": "vgg16", # string (torchvision net) or list of channels + "num_downsample": 4, # how many downsample block (if VGG-style net) + "decoder": [64, 64, 64, 64], # list of channels of decoder + "decoder_norm": "nn.BatchNorm2d", # normalization ind decoder blocks + "do_average_pooling": False, + "checkpointed": False, # whether to use gradient checkpointing + "padding": "zeros" + } + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + def build_encoder(self, conf): + assert isinstance(conf.encoder, str) + if conf.pretrained: + assert conf.input_dim == 3 + Encoder = getattr(torchvision.models, conf.encoder) + encoder = Encoder(weights="DEFAULT" if conf.pretrained else None) + Block = checkpointed(torch.nn.Sequential, do=conf.checkpointed) + assert max(conf.output_scales) <= conf.num_downsample + + if conf.encoder.startswith("vgg"): + # Parse the layers and pack them into downsampling blocks + # It's easy for VGG-style nets because of their linear structure. + # This does not handle strided convs and residual connections + skip_dims = [] + previous_dim = None + blocks = [[]] + for i, layer in enumerate(encoder.features): + if isinstance(layer, torch.nn.Conv2d): + # Change the first conv layer if the input dim mismatches + if i == 0 and conf.input_dim != layer.in_channels: + args = {k: getattr(layer, k) for k in layer.__constants__} + args.pop("output_padding") + layer = torch.nn.Conv2d( + **{**args, "in_channels": conf.input_dim} + ) + previous_dim = layer.out_channels + elif isinstance(layer, torch.nn.MaxPool2d): + assert previous_dim is not None + skip_dims.append(previous_dim) + if (conf.num_downsample + 1) == len(blocks): + break + blocks.append([]) # start a new block + if conf.do_average_pooling: + assert layer.dilation == 1 + layer = torch.nn.AvgPool2d( + kernel_size=layer.kernel_size, + stride=layer.stride, + padding=layer.padding, + ceil_mode=layer.ceil_mode, + count_include_pad=False, + ) + blocks[-1].append(layer) + encoder = [Block(*b) for b in blocks] + elif conf.encoder.startswith("resnet"): + # Manually define the ResNet blocks such that the downsampling comes first + assert conf.encoder[len("resnet") :] in ["18", "34", "50", "101"] + assert conf.input_dim == 3, "Unsupported for now." + block1 = torch.nn.Sequential(encoder.conv1, encoder.bn1, encoder.relu) + block2 = torch.nn.Sequential(encoder.maxpool, encoder.layer1) + block3 = encoder.layer2 + block4 = encoder.layer3 + block5 = encoder.layer4 + blocks = [block1, block2, block3, block4, block5] + # Extract the output dimension of each block + skip_dims = [encoder.conv1.out_channels] + for i in range(1, 5): + modules = getattr(encoder, f"layer{i}")[-1]._modules + conv = sorted(k for k in modules if k.startswith("conv"))[-1] + skip_dims.append(modules[conv].out_channels) + # Add a dummy block such that the first one does not downsample + encoder = [torch.nn.Identity()] + [Block(b) for b in blocks] + skip_dims = [3] + skip_dims + # Trim based on the requested encoder size + encoder = encoder[: conf.num_downsample + 1] + skip_dims = skip_dims[: conf.num_downsample + 1] + else: + raise NotImplementedError(conf.encoder) + + assert (conf.num_downsample + 1) == len(encoder) + encoder = nn.ModuleList(encoder) + + return encoder, skip_dims + + def _init(self, conf): + # Encoder + self.encoder, skip_dims = self.build_encoder(conf) + self.skip_dims = skip_dims + + def update_padding(module): + if isinstance(module, nn.Conv2d): + module.padding_mode = conf.padding + + if conf.padding != "zeros": + self.encoder.apply(update_padding) + + # Decoder + if conf.decoder is not None: + assert len(conf.decoder) == (len(skip_dims) - 1) + Block = checkpointed(DecoderBlock, do=conf.checkpointed) + norm = eval(conf.decoder_norm) if conf.decoder_norm else None # noqa + + previous = skip_dims[-1] + decoder = [] + for out, skip in zip(conf.decoder, skip_dims[:-1][::-1]): + decoder.append( + Block(previous, skip, out, norm=norm, padding=conf.padding) + ) + previous = out + self.decoder = nn.ModuleList(decoder) + + # Adaptation layers + adaptation = [] + for idx, i in enumerate(conf.output_scales): + if conf.decoder is None or i == (len(self.encoder) - 1): + input_ = skip_dims[i] + else: + input_ = conf.decoder[-1 - i] + + # out_dim can be an int (same for all scales) or a list (per scale) + dim = conf.output_dim + if not isinstance(dim, int): + dim = dim[idx] + + block = AdaptationBlock(input_, dim) + adaptation.append(block) + self.adaptation = nn.ModuleList(adaptation) + self.scales = [2**s for s in conf.output_scales] + + def _forward(self, data): + image = data["image"] + if self.conf.pretrained: + mean, std = image.new_tensor(self.mean), image.new_tensor(self.std) + image = (image - mean[:, None, None]) / std[:, None, None] + + skip_features = [] + features = image + for block in self.encoder: + # print("block features",block) + features = block(features) + skip_features.append(features) + + if self.conf.decoder: + pre_features = [skip_features[-1]] + for block, skip in zip(self.decoder, skip_features[:-1][::-1]): + pre_features.append(block(pre_features[-1], skip)) + pre_features = pre_features[::-1] # fine to coarse + else: + pre_features = skip_features + + out_features = [] + for adapt, i in zip(self.adaptation, self.conf.output_scales): + out_features.append(adapt(pre_features[i])) + pred = {"feature_maps": out_features, "skip_features": skip_features} + return pred + + def loss(self, pred, data): + raise NotImplementedError + + def metrics(self, pred, data): + raise NotImplementedError diff --git a/models/feature_extractor_v2.py b/models/feature_extractor_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..ee238c8aa12cc0dd47431dea87e28a96622a9128 --- /dev/null +++ b/models/feature_extractor_v2.py @@ -0,0 +1,194 @@ +import logging + +import numpy as np +import torch +import torch.nn as nn +import torchvision +from torchvision.models.feature_extraction import create_feature_extractor + +from .base import BaseModel + +logger = logging.getLogger(__name__) + + +class DecoderBlock(nn.Module): + def __init__( + self, previous, out, ksize=3, num_convs=1, norm=nn.BatchNorm2d, padding="zeros" + ): + super().__init__() + layers = [] + for i in range(num_convs): + conv = nn.Conv2d( + previous if i == 0 else out, + out, + kernel_size=ksize, + padding=ksize // 2, + bias=norm is None, + padding_mode=padding, + ) + layers.append(conv) + if norm is not None: + layers.append(norm(out)) + layers.append(nn.ReLU(inplace=True)) + self.layers = nn.Sequential(*layers) + + def forward(self, previous, skip): + _, _, hp, wp = previous.shape + _, _, hs, ws = skip.shape + scale = 2 ** np.round(np.log2(np.array([hs / hp, ws / wp]))) + upsampled = nn.functional.interpolate( + previous, scale_factor=scale.tolist(), mode="bilinear", align_corners=False + ) + # If the shape of the input map `skip` is not a multiple of 2, + # it will not match the shape of the upsampled map `upsampled`. + # If the downsampling uses ceil_mode=False, we nedd to crop `skip`. + # If it uses ceil_mode=True (not supported here), we should pad it. + _, _, hu, wu = upsampled.shape + _, _, hs, ws = skip.shape + if (hu <= hs) and (wu <= ws): + skip = skip[:, :, :hu, :wu] + elif (hu >= hs) and (wu >= ws): + skip = nn.functional.pad(skip, [0, wu - ws, 0, hu - hs]) + else: + raise ValueError( + f"Inconsistent skip vs upsampled shapes: {(hs, ws)}, {(hu, wu)}" + ) + + return self.layers(skip) + upsampled + + +class FPN(nn.Module): + def __init__(self, in_channels_list, out_channels, **kw): + super().__init__() + self.first = nn.Conv2d( + in_channels_list[-1], out_channels, 1, padding=0, bias=True + ) + self.blocks = nn.ModuleList( + [ + DecoderBlock(c, out_channels, ksize=1, **kw) + for c in in_channels_list[::-1][1:] + ] + ) + self.out = nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, layers): + feats = None + for idx, x in enumerate(reversed(layers.values())): + if feats is None: + feats = self.first(x) + else: + feats = self.blocks[idx - 1](feats, x) + out = self.out(feats) + return out + + +def remove_conv_stride(conv): + conv_new = nn.Conv2d( + conv.in_channels, + conv.out_channels, + conv.kernel_size, + bias=conv.bias is not None, + stride=1, + padding=conv.padding, + ) + conv_new.weight = conv.weight + conv_new.bias = conv.bias + return conv_new + + +class FeatureExtractor(BaseModel): + default_conf = { + "pretrained": True, + "input_dim": 3, + "output_dim": 128, # # of channels in output feature maps + "encoder": "resnet50", # torchvision net as string + "remove_stride_from_first_conv": False, + "num_downsample": None, # how many downsample block + "decoder_norm": "nn.BatchNorm2d", # normalization ind decoder blocks + "do_average_pooling": False, + "checkpointed": False, # whether to use gradient checkpointing + } + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + def build_encoder(self, conf): + assert isinstance(conf.encoder, str) + if conf.pretrained: + assert conf.input_dim == 3 + Encoder = getattr(torchvision.models, conf.encoder) + + kw = {} + if conf.encoder.startswith("resnet"): + layers = ["relu", "layer1", "layer2", "layer3", "layer4"] + kw["replace_stride_with_dilation"] = [False, False, False] + elif conf.encoder == "vgg13": + layers = [ + "features.3", + "features.8", + "features.13", + "features.18", + "features.23", + ] + elif conf.encoder == "vgg16": + layers = [ + "features.3", + "features.8", + "features.15", + "features.22", + "features.29", + ] + else: + raise NotImplementedError(conf.encoder) + + if conf.num_downsample is not None: + layers = layers[: conf.num_downsample] + encoder = Encoder(weights="DEFAULT" if conf.pretrained else None, **kw) + encoder = create_feature_extractor(encoder, return_nodes=layers) + if conf.encoder.startswith("resnet") and conf.remove_stride_from_first_conv: + encoder.conv1 = remove_conv_stride(encoder.conv1) + + if conf.do_average_pooling: + raise NotImplementedError + if conf.checkpointed: + raise NotImplementedError + + return encoder, layers + + def _init(self, conf): + # Preprocessing + self.register_buffer("mean_", torch.tensor(self.mean), persistent=False) + self.register_buffer("std_", torch.tensor(self.std), persistent=False) + + # Encoder + self.encoder, self.layers = self.build_encoder(conf) + s = 128 + inp = torch.zeros(1, 3, s, s) + features = list(self.encoder(inp).values()) + self.skip_dims = [x.shape[1] for x in features] + self.layer_strides = [s / f.shape[-1] for f in features] + self.scales = [self.layer_strides[0]] + + # Decoder + norm = eval(conf.decoder_norm) if conf.decoder_norm else None # noqa + self.decoder = FPN(self.skip_dims, out_channels=conf.output_dim, norm=norm) + + logger.debug( + "Built feature extractor with layers {name:dim:stride}:\n" + f"{list(zip(self.layers, self.skip_dims, self.layer_strides))}\n" + f"and output scales {self.scales}." + ) + + def _forward(self, data): + image = data["image"] + image = (image - self.mean_[:, None, None]) / self.std_[:, None, None] + + skip_features = self.encoder(image) + output = self.decoder(skip_features) + pred = {"feature_maps": [output], "skip_features": skip_features} + return pred +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/models/feature_extractor_v3.py b/models/feature_extractor_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..3ab0d21491629639e37c6e48c156ef58fa0d8c37 --- /dev/null +++ b/models/feature_extractor_v3.py @@ -0,0 +1,214 @@ +import logging + +import numpy as np +import torch +import torch.nn as nn +import torchvision +from torchvision.models.feature_extraction import create_feature_extractor + +from .base import BaseModel + +logger = logging.getLogger(__name__) + + +class DecoderBlock(nn.Module): + def __init__( + self, previous, out, ksize=3, num_convs=1, norm=nn.BatchNorm2d, padding="zeros" + ): + super().__init__() + layers = [] + for i in range(num_convs): + conv = nn.Conv2d( + previous if i == 0 else out, + out, + kernel_size=ksize, + padding=ksize // 2, + bias=norm is None, + padding_mode=padding, + ) + layers.append(conv) + if norm is not None: + layers.append(norm(out)) + layers.append(nn.ReLU(inplace=True)) + self.layers = nn.Sequential(*layers) + + def forward(self, previous, skip): + _, _, hp, wp = previous.shape + _, _, hs, ws = skip.shape + scale = 2 ** np.round(np.log2(np.array([hs / hp, ws / wp]))) + upsampled = nn.functional.interpolate( + previous, scale_factor=scale.tolist(), mode="bilinear", align_corners=False + ) + # If the shape of the input map `skip` is not a multiple of 2, + # it will not match the shape of the upsampled map `upsampled`. + # If the downsampling uses ceil_mode=False, we nedd to crop `skip`. + # If it uses ceil_mode=True (not supported here), we should pad it. + _, _, hu, wu = upsampled.shape + _, _, hs, ws = skip.shape + if (hu <= hs) and (wu <= ws): + skip = skip[:, :, :hu, :wu] + elif (hu >= hs) and (wu >= ws): + skip = nn.functional.pad(skip, [0, wu - ws, 0, hu - hs]) + else: + raise ValueError( + f"Inconsistent skip vs upsampled shapes: {(hs, ws)}, {(hu, wu)}" + ) + + return self.layers(skip) + upsampled + + +class FPN(nn.Module): + def __init__(self, in_channels_list, out_channels, **kw): + super().__init__() + self.first = nn.Conv2d( + in_channels_list[-1], out_channels, 1, padding=0, bias=True + ) + self.blocks = nn.ModuleList( + [ + DecoderBlock(c, out_channels, ksize=1, **kw) + for c in in_channels_list[::-1][1:] + ] + ) + self.out = nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, layers): + feats = None + for idx, x in enumerate(reversed(layers.values())): + if feats is None: + feats = self.first(x) + else: + feats = self.blocks[idx - 1](feats, x) + out = self.out(feats) + return out + + +def remove_conv_stride(conv): + conv_new = nn.Conv2d( + conv.in_channels, + conv.out_channels, + conv.kernel_size, + bias=conv.bias is not None, + stride=1, + padding=conv.padding, + ) + conv_new.weight = conv.weight + conv_new.bias = conv.bias + return conv_new + +class DoubleConv(nn.Module): + def __init__(self, in_channels, out_channels): + super(DoubleConv, self).__init__() + self.double_conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.double_conv(x) + +class FeatureExtractor(BaseModel): + default_conf = { + "pretrained": True, + "input_dim": 3, + "output_dim": 128, # # of channels in output feature maps + "encoder": "resnet50", # torchvision net as string + "remove_stride_from_first_conv": False, + "num_downsample": None, # how many downsample block + "decoder_norm": "nn.BatchNorm2d", # normalization ind decoder blocks + "do_average_pooling": False, + "checkpointed": False, # whether to use gradient checkpointing + } + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + def build_encoder(self, conf): + assert isinstance(conf.encoder, str) + if conf.pretrained: + assert conf.input_dim == 3 + # Encoder + self.conv1 = self.conv_block(conf.input_dim, 64) + self.pool1 = nn.MaxPool2d(2, 2) + self.conv2 = self.conv_block(64, 128) + self.pool2 = nn.MaxPool2d(2, 2) + self.conv3 = self.conv_block(128, 256) + self.pool3 = nn.MaxPool2d(2, 2) + self.conv4 = self.conv_block(256, 512) + self.pool4 = nn.MaxPool2d(2, 2) + self.conv5 = self.conv_block(512, 1024) + + # Decoder + self.up6 = nn.ConvTranspose2d(1024, 512, 2, stride=2) + self.conv6 = self.conv_block(1024, 512) + self.up7 = nn.ConvTranspose2d(512, 256, 2, stride=2) + self.conv7 = self.conv_block(512, 256) + self.up8 = nn.ConvTranspose2d(256, 128, 2, stride=2) + self.conv8 = self.conv_block(256, 128) + self.up9 = nn.ConvTranspose2d(128, 64, 2, stride=2) + self.conv9 = self.conv_block(128, 64) + self.conv10 = nn.Conv2d(64, conf.output_dim, 1) + + # return encoder, layers + def unet(self,x): + # Encoder + conv1 = self.conv1(x) + pool1 = self.pool1(conv1) + conv2 = self.conv2(pool1) + pool2 = self.pool2(conv2) + conv3 = self.conv3(pool2) + pool3 = self.pool3(conv3) + conv4 = self.conv4(pool3) + pool4 = self.pool4(conv4) + conv5 = self.conv5(pool4) + + # Decoder + up6 = self.up6(conv5) + concat6 = torch.cat([up6, conv4], dim=1) + conv6 = self.conv6(concat6) + up7 = self.up7(conv6) + concat7 = torch.cat([up7, conv3], dim=1) + conv7 = self.conv7(concat7) + up8 = self.up8(conv7) + concat8 = torch.cat([up8, conv2], dim=1) + conv8 = self.conv8(concat8) + up9 = self.up9(conv8) + concat9 = torch.cat([up9, conv1], dim=1) + conv9 = self.conv9(concat9) + output = self.conv10(conv9) + + return output + + def conv_block(self, in_channels, out_channels): + return nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=1), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, out_channels, 3, padding=1), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + def _init(self, conf): + # Preprocessing + self.register_buffer("mean_", torch.tensor(self.mean), persistent=False) + self.register_buffer("std_", torch.tensor(self.std), persistent=False) + + # Encoder + self.build_encoder(conf) + + + def _forward(self, data): + image = data["image"] + image = (image - self.mean_[:, None, None]) / self.std_[:, None, None] + + output = self.unet(image) + # output = self.decoder(skip_features) + + pred = {"feature_maps": [output]} + return pred +if __name__ == '__main__': + model=FeatureExtractor() \ No newline at end of file diff --git a/models/feature_extractor_v4.py b/models/feature_extractor_v4.py new file mode 100644 index 0000000000000000000000000000000000000000..9efab6ab751f09cc78d084dd54804dbc1a9364a6 --- /dev/null +++ b/models/feature_extractor_v4.py @@ -0,0 +1,100 @@ +import logging + +import numpy as np +import torch +import torch.nn as nn +import torchvision +from torchvision.models.feature_extraction import create_feature_extractor +import feature_extractor_models as smp +import torch +from .base import BaseModel + +logger = logging.getLogger(__name__) + + + +class FeatureExtractor(BaseModel): + default_conf = { + "pretrained": True, + "input_dim": 3, + "output_dim": 128, # # of channels in output feature maps + "encoder": "resnet50", # torchvision net as string + "remove_stride_from_first_conv": False, + "num_downsample": None, # how many downsample block + "decoder_norm": "nn.BatchNorm2d", # normalization ind decoder blocks + "do_average_pooling": False, + "checkpointed": False, # whether to use gradient checkpointing + "architecture":"FPN" + } + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + # self.fmodel=None + + def build_encoder(self, conf): + assert isinstance(conf.encoder, str) + if conf.pretrained: + assert conf.input_dim == 3 + + + # return encoder, layers + + + + def _init(self, conf): + # Preprocessing + self.register_buffer("mean_", torch.tensor(self.mean), persistent=False) + self.register_buffer("std_", torch.tensor(self.std), persistent=False) + + if conf.architecture=="FPN": + # Encoder + self.fmodel = smp.FPN( + encoder_name=conf.encoder, # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 + encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization + in_channels=conf.input_dim, # model input channels (1 for gray-scale images, 3 for RGB, etc.) + classes=conf.output_dim, # model output channels (number of classes in your dataset) + upsampling=2, # optional, final output upsampling, default is 8 + activation=None + ) + elif conf.architecture == "LightFPN": + self.fmodel = smp.L( + encoder_name=conf.encoder, # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 + encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization + in_channels=conf.input_dim, # model input channels (1 for gray-scale images, 3 for RGB, etc.) + classes=conf.output_dim, # model output channels (number of classes in your dataset) + upsampling=2, # optional, final output upsampling, default is 8 + activation=None + ) + elif conf.architecture=="PSP": + + self.fmodel =smp.PSPNet( + encoder_name=conf.encoder, # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 + encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization + in_channels=conf.input_dim, # model input channels (1 for gray-scale images, 3 for RGB, etc.) + classes=conf.output_dim, # model output channels (number of classes in your dataset) + upsampling=4, # optional, final output upsampling, default is 8 + activation=None + ) + else: + raise ValueError("Only FPN") + # elif conf.architecture=="Unet": + # self.fmodel = smp.FPN( + # encoder_name=conf.encoder, # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 + # encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization + # in_channels=conf.input_dim, # model input channels (1 for gray-scale images, 3 for RGB, etc.) + # classes=conf.output_dim, # model output channels (number of classes in your dataset) + # # upsampling=int(conf.upsampling), # optional, final output upsampling, default is 8 + # activation="relu" + # ) + + + def _forward(self, data): + image = data["image"] + image = (image - self.mean_[:, None, None]) / self.std_[:, None, None] + + output = self.fmodel(image) + # output = self.decoder(skip_features) + + pred = {"feature_maps": [output]} + return pred +if __name__ == '__main__': + model=FeatureExtractor() \ No newline at end of file diff --git a/models/feature_extractor_v5.py b/models/feature_extractor_v5.py new file mode 100644 index 0000000000000000000000000000000000000000..1c6c41ad420c471be6a5e36cc0eb3fcc2e188792 --- /dev/null +++ b/models/feature_extractor_v5.py @@ -0,0 +1,423 @@ +import logging + +import numpy as np +import torch +import torch.nn as nn +import torchvision +from torchvision.models.feature_extraction import create_feature_extractor +import feature_extractor_models as smp +import torch +from .base import BaseModel + +logger = logging.getLogger(__name__) + +import math +import torch +import numpy as np +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init +from collections import OrderedDict + +import torch.distributed as dist + + +def get_batch_norm(inplace=False): + if dist.is_available() and dist.is_initialized(): # 检查是否在分布式环境中 + return nn.SyncBatchNorm + else: + return nn.BatchNorm2d + + +BatchNorm2d = get_batch_norm() +bn_mom = 0.1 + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, no_relu=False): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = BatchNorm2d(planes, momentum=bn_mom) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = BatchNorm2d(planes, momentum=bn_mom) + self.downsample = downsample + self.stride = stride + self.no_relu = no_relu + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + if self.no_relu: + return out + else: + return self.relu(out) + + +class Bottleneck(nn.Module): + expansion = 2 + + def __init__(self, inplanes, planes, stride=1, downsample=None, no_relu=True): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = BatchNorm2d(planes, momentum=bn_mom) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = BatchNorm2d(planes, momentum=bn_mom) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + bias=False) + self.bn3 = BatchNorm2d(planes * self.expansion, momentum=bn_mom) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.no_relu = no_relu + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + if self.no_relu: + return out + else: + return self.relu(out) + + +class DAPPM(nn.Module): + def __init__(self, inplanes, branch_planes, outplanes): + super(DAPPM, self).__init__() + self.scale1 = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=2, padding=2), + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale2 = nn.Sequential(nn.AvgPool2d(kernel_size=9, stride=4, padding=4), + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale3 = nn.Sequential(nn.AvgPool2d(kernel_size=17, stride=8, padding=8), + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale4 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.scale0 = nn.Sequential( + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False), + ) + self.process1 = nn.Sequential( + BatchNorm2d(branch_planes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + ) + self.process2 = nn.Sequential( + BatchNorm2d(branch_planes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + ) + self.process3 = nn.Sequential( + BatchNorm2d(branch_planes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + ) + self.process4 = nn.Sequential( + BatchNorm2d(branch_planes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + ) + self.compression = nn.Sequential( + BatchNorm2d(branch_planes * 5, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes * 5, outplanes, kernel_size=1, bias=False), + ) + self.shortcut = nn.Sequential( + BatchNorm2d(inplanes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=False), + ) + + def forward(self, x): + # x = self.downsample(x) + width = x.shape[-1] + height = x.shape[-2] + x_list = [] + + x_list.append(self.scale0(x)) + x_list.append(self.process1((F.interpolate(self.scale1(x), + size=[height, width], + mode='bilinear') + x_list[0]))) + x_list.append((self.process2((F.interpolate(self.scale2(x), + size=[height, width], + mode='bilinear') + x_list[1])))) + x_list.append(self.process3((F.interpolate(self.scale3(x), + size=[height, width], + mode='bilinear') + x_list[2]))) + x_list.append(self.process4((F.interpolate(self.scale4(x), + size=[height, width], + mode='bilinear') + x_list[3]))) + + out = self.compression(torch.cat(x_list, 1)) + self.shortcut(x) + return out + + +class segmenthead(nn.Module): + + def __init__(self, inplanes, interplanes, outplanes, scale_factor=None): + super(segmenthead, self).__init__() + self.bn1 = BatchNorm2d(inplanes, momentum=bn_mom) + self.conv1 = nn.Conv2d(inplanes, interplanes, kernel_size=3, padding=1, bias=False) + self.bn2 = BatchNorm2d(interplanes, momentum=bn_mom) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(interplanes, outplanes, kernel_size=1, padding=0, bias=True) + self.scale_factor = scale_factor + + def forward(self, x): + x = self.conv1(self.relu(self.bn1(x))) + out = self.conv2(self.relu(self.bn2(x))) + + if self.scale_factor is not None: + height = x.shape[-2] * self.scale_factor + width = x.shape[-1] * self.scale_factor + out = F.interpolate(out, + size=[height, width], + mode='bilinear') + + return out + + +class DualResNet(nn.Module): + + def __init__(self, block, layers, num_classes=19, planes=64, spp_planes=128, head_planes=128, augment=False): + super(DualResNet, self).__init__() + + highres_planes = planes * 2 + self.augment = augment + + self.conv1 = nn.Sequential( + nn.Conv2d(3, planes, kernel_size=3, stride=2, padding=1), + BatchNorm2d(planes, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(planes, planes, kernel_size=3, stride=2, padding=1), + BatchNorm2d(planes, momentum=bn_mom), + nn.ReLU(inplace=True), + ) + + self.relu = nn.ReLU(inplace=False) + self.layer1 = self._make_layer(block, planes, planes, layers[0]) + self.layer2 = self._make_layer(block, planes, planes * 2, layers[1], stride=2) + self.layer3 = self._make_layer(block, planes * 2, planes * 4, layers[2], stride=2) + self.layer4 = self._make_layer(block, planes * 4, planes * 8, layers[3], stride=2) + + self.compression3 = nn.Sequential( + nn.Conv2d(planes * 4, highres_planes, kernel_size=1, bias=False), + BatchNorm2d(highres_planes, momentum=bn_mom), + ) + + self.compression4 = nn.Sequential( + nn.Conv2d(planes * 8, highres_planes, kernel_size=1, bias=False), + BatchNorm2d(highres_planes, momentum=bn_mom), + ) + + self.down3 = nn.Sequential( + nn.Conv2d(highres_planes, planes * 4, kernel_size=3, stride=2, padding=1, bias=False), + BatchNorm2d(planes * 4, momentum=bn_mom), + ) + + self.down4 = nn.Sequential( + nn.Conv2d(highres_planes, planes * 4, kernel_size=3, stride=2, padding=1, bias=False), + BatchNorm2d(planes * 4, momentum=bn_mom), + nn.ReLU(inplace=True), + nn.Conv2d(planes * 4, planes * 8, kernel_size=3, stride=2, padding=1, bias=False), + BatchNorm2d(planes * 8, momentum=bn_mom), + ) + + self.layer3_ = self._make_layer(block, planes * 2, highres_planes, 2) + + self.layer4_ = self._make_layer(block, highres_planes, highres_planes, 2) + + self.layer5_ = self._make_layer(Bottleneck, highres_planes, highres_planes, 1) + + self.layer5 = self._make_layer(Bottleneck, planes * 8, planes * 8, 1, stride=2) + + self.spp = DAPPM(planes * 16, spp_planes, planes * 4) + + if self.augment: + self.seghead_extra = segmenthead(highres_planes, head_planes, num_classes) + + self.final_layer = segmenthead(planes * 4, head_planes, num_classes, scale_factor=4) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion, momentum=bn_mom), + ) + + layers = [] + layers.append(block(inplanes, planes, stride, downsample)) + inplanes = planes * block.expansion + for i in range(1, blocks): + if i == (blocks - 1): + layers.append(block(inplanes, planes, stride=1, no_relu=True)) + else: + layers.append(block(inplanes, planes, stride=1, no_relu=False)) + + return nn.Sequential(*layers) + + def forward(self, x): + + width_output = x.shape[-1] // 8 + height_output = x.shape[-2] // 8 + layers = [] + + x = self.conv1(x) + + x = self.layer1(x) + layers.append(x) + + x = self.layer2(self.relu(x)) + layers.append(x) + + x = self.layer3(self.relu(x)) + layers.append(x) + x_ = self.layer3_(self.relu(layers[1])) + + x = x + self.down3(self.relu(x_)) + x_ = x_ + F.interpolate( + self.compression3(self.relu(layers[2])), + size=[height_output, width_output], + mode='bilinear') + if self.augment: + temp = x_ + + x = self.layer4(self.relu(x)) + layers.append(x) + x_ = self.layer4_(self.relu(x_)) + + x = x + self.down4(self.relu(x_)) + x_ = x_ + F.interpolate( + self.compression4(self.relu(layers[3])), + size=[height_output, width_output], + mode='bilinear') + + x_ = self.layer5_(self.relu(x_)) + x = F.interpolate( + self.spp(self.layer5(self.relu(x))), + size=[height_output, width_output], + mode='bilinear') + + x_ = self.final_layer(x + x_) + + if self.augment: + x_extra = self.seghead_extra(temp) + return [x_, x_extra] + else: + return x_ + +class FeatureExtractor(BaseModel): + default_conf = { + "pretrained": True, + "input_dim": 3, + "output_dim": 128, # # of channels in output feature maps + "encoder": "resnet50", # torchvision net as string + "remove_stride_from_first_conv": False, + "num_downsample": None, # how many downsample block + "decoder_norm": "nn.BatchNorm2d", # normalization ind decoder blocks + "do_average_pooling": False, + "checkpointed": False, # whether to use gradient checkpointing + "architecture":"FPN" + } + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + # self.fmodel=None + + def build_encoder(self, conf): + assert isinstance(conf.encoder, str) + if conf.pretrained: + assert conf.input_dim == 3 + + + # return encoder, layers + + + + def _init(self, conf): + # Preprocessing + self.register_buffer("mean_", torch.tensor(self.mean), persistent=False) + self.register_buffer("std_", torch.tensor(self.std), persistent=False) + + if conf.architecture=="DDRNet23s": + # Encoder + self.fmodel= DualResNet(BasicBlock, [2, 2, 2, 2], num_classes=conf.output_dim, planes=32, spp_planes=128, head_planes=64, augment=False) + else: + raise ValueError("DDRNet23s") + # elif conf.architecture=="Unet": + # self.fmodel = smp.FPN( + # encoder_name=conf.encoder, # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 + # encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization + # in_channels=conf.input_dim, # model input channels (1 for gray-scale images, 3 for RGB, etc.) + # classes=conf.output_dim, # model output channels (number of classes in your dataset) + # # upsampling=int(conf.upsampling), # optional, final output upsampling, default is 8 + # activation="relu" + # ) + + + def _forward(self, data): + image = data["image"] + image = (image - self.mean_[:, None, None]) / self.std_[:, None, None] + + output = self.fmodel(image) + # output = self.decoder(skip_features) + + pred = {"feature_maps": [output]} + return pred +if __name__ == '__main__': + model=FeatureExtractor() \ No newline at end of file diff --git a/models/map_encoder.py b/models/map_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..ec7d838122715ecd483e9eac5012ee481614d5b3 --- /dev/null +++ b/models/map_encoder.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import torch +import torch.nn as nn + +from .base import BaseModel +from .feature_extractor import FeatureExtractor + + +class MapEncoder(BaseModel): + default_conf = { + "embedding_dim": "???", + "output_dim": None, + "num_classes": "???", + "backbone": "???", + "unary_prior": False, + } + + def _init(self, conf): + self.embeddings = torch.nn.ModuleDict( + { + k: torch.nn.Embedding(n + 1, conf.embedding_dim) + for k, n in conf.num_classes.items() + } + ) + #num_calsses:{'areas': 7, 'ways': 10, 'nodes': 33} + input_dim = len(conf.num_classes) * conf.embedding_dim + output_dim = conf.output_dim + if output_dim is None: + output_dim = conf.backbone.output_dim + if conf.unary_prior: + output_dim += 1 + if conf.backbone is None: + self.encoder = nn.Conv2d(input_dim, output_dim, 1) + elif conf.backbone == "simple": + self.encoder = nn.Sequential( + nn.Conv2d(input_dim, 128, 3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 128, 3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, output_dim, 3, padding=1), + ) + else: + self.encoder = FeatureExtractor( + { + **conf.backbone, + "input_dim": input_dim, + "output_dim": output_dim, + } + ) + + def _forward(self, data): + # embeddings = [ + # self.embeddings[k](data["map"][:, i].cuda()) + # for i, k in enumerate(("areas", "ways", "nodes")) + # ] + embeddings = [ + self.embeddings[k](data["map"][:, i]) + for i, k in enumerate(("areas", "ways", "nodes")) + ] + # for i in embeddings: + # print("embeddings:",i.shape)#[6, 256, 256, 16] + embeddings = torch.cat(embeddings, dim=-1).permute(0, 3, 1, 2)#[6,48, 256, 256] + # print("Old",embeddings.shape) + pass + if isinstance(self.encoder, BaseModel): + # print("encoder BaseModel:") + features = self.encoder({"image": embeddings})["feature_maps"] + else: + # print("encoder NoBaseModel:") + features = [self.encoder(embeddings)] + pred = {} + if self.conf.unary_prior: + pred["log_prior"] = [f[:, -1] for f in features] + features = [f[:, :-1] for f in features] + pred["map_features"] = features + return pred diff --git a/models/map_encoder_single.py b/models/map_encoder_single.py new file mode 100644 index 0000000000000000000000000000000000000000..1ef157355dc2ca4a1544dddc9ba4a522a7ffa607 --- /dev/null +++ b/models/map_encoder_single.py @@ -0,0 +1,213 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import torch +import torch.nn as nn + +from .base import BaseModel +from .feature_extractor import FeatureExtractor +import numpy as np +# from .embeddings import AttentionWeightedEmbedding, + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import torch.nn.functional as F + +class ImprovedAttentionEmbedding(nn.Module): + def __init__(self, num_embeddings, embedding_dim, weight_dim=1, dropout=0.1, weight_init='normal'): + super(ImprovedAttentionEmbedding, self).__init__() + self.embedding = nn.Embedding(num_embeddings, embedding_dim) + self.weight_dim = weight_dim + + # 可学习的权重矩阵 [num_embeddings, weight_dim] + if weight_init == 'normal': + self.weights = nn.Parameter(torch.randn(num_embeddings, weight_dim)) + elif weight_init == 'uniform': + self.weights = nn.Parameter(torch.rand(num_embeddings, weight_dim)) + else: + self.weights = nn.Parameter(torch.ones(num_embeddings, weight_dim)) + + self.weight_norm = nn.LayerNorm(weight_dim) + self.dropout = nn.Dropout(dropout) + + # L2正则化 + self.l2_reg = 1e-5 + + def forward(self, input): + embedded = self.embedding(input) # [batch, 256, 256, embedding_dim] + + # 获取权重,并进行归一化 + weight = self.weights[input] # [batch, 256, 256, weight_dim] + weight = self.weight_norm(weight) + weight = F.softmax(weight, dim=-1) + + # 对嵌入向量进行加权 + if self.weight_dim == 1: + weighted_embedded = embedded * weight # [batch, 256, 256, embedding_dim] + else: + weighted_embedded = embedded * weight.unsqueeze(-1) + + weighted_embedded = self.dropout(weighted_embedded) + + return weighted_embedded + + def get_l2_reg(self): + return self.l2_reg * (self.weights ** 2).sum() +class AttentionWeightedEmbedding(nn.Module): + def __init__(self, num_embeddings, embedding_dim): + super(AttentionWeightedEmbedding, self).__init__() + self.embedding = nn.Embedding(num_embeddings, embedding_dim) + self.query = nn.Parameter(torch.randn(embedding_dim)) # 可训练的查询向量 + self.softmax = nn.Softmax(dim=-1) + + def forward(self, input): + # 获取嵌入向量 + embedded = self.embedding(input) # Shape: [batch_size, sequence_length, embedding_dim] + + # 计算注意力得分 + attn_scores = torch.matmul(embedded, self.query) # Shape: [batch_size, sequence_length] + + # 归一化注意力得分以得到权重 + attn_weights = self.softmax(attn_scores).unsqueeze(-1) # Shape: [batch_size, sequence_length, 1] + + # 对嵌入向量应用权重 + weighted_embedded = embedded * attn_weights # Shape: [batch_size, sequence_length, embedding_dim] + + return weighted_embedded +class WeightedEmbedding(nn.Module): + def __init__(self, num_embeddings, embedding_dim): + super(WeightedEmbedding, self).__init__() + self.embedding = nn.Embedding(num_embeddings, embedding_dim) + # 可学习的权重矩阵 [num_embeddings, 1] + self.weights = nn.Parameter(torch.ones(num_embeddings, 1)) + + def forward(self, input): + embedded = self.embedding(input) # [batch, 256, 256, embedding_dim] + # 获取权重,并扩展维度以便进行广播运算 + weight = self.weights[input] # [batch, 256, 256, 1] + # 对嵌入向量进行按元素乘法 + weighted_embedded = embedded * weight # [batch, 256, 256, embedding_dim] + return weighted_embedded + +class MapEncoderSingle(BaseModel): + default_conf = { + "embedding_dim": "???", + "output_dim": None, + "num_classes": "???", + "backbone": "???", + "unary_prior": False, + "weighted_embedding": False + } + + def _init(self, conf): + if conf.weighted_embedding==False: + self.embeddings = torch.nn.ModuleDict( + { + k: torch.nn.Embedding(n + 1, conf.embedding_dim) + for k, n in conf.num_classes.items() + } + ) + else: + if conf.weighted_embedding=="AttentionWeightedEmbedding": + self.embeddings = torch.nn.ModuleDict( + { + k: AttentionWeightedEmbedding(n + 1, conf.embedding_dim) + for k, n in conf.num_classes.items() + } + ) + elif conf.weighted_embedding=="WeightedEmbedding": + self.embeddings = torch.nn.ModuleDict( + { + k: WeightedEmbedding(n + 1, conf.embedding_dim) + for k, n in conf.num_classes.items() + } + ) + elif conf.weighted_embedding=="ImprovedAttentionEmbedding": + self.embeddings = torch.nn.ModuleDict( + { + k: ImprovedAttentionEmbedding(n + 1, conf.embedding_dim) + for k, n in conf.num_classes.items() + } + ) + else: + pass + + + #num_calsses:{'areas': 7, 'ways': 10, 'nodes': 33} + input_dim = len(conf.num_classes) * conf.embedding_dim + output_dim = conf.output_dim + if output_dim is None: + output_dim = conf.backbone.output_dim + if conf.unary_prior: + output_dim += 1 + if conf.backbone is None: + self.encoder = nn.Conv2d(input_dim, output_dim, 1) + elif conf.backbone == "simple": + self.encoder = nn.Sequential( + nn.Conv2d(input_dim, 128, 3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 128, 3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, output_dim, 3, padding=1), + ) + else: + self.encoder = FeatureExtractor( + { + **conf.backbone, + "input_dim": input_dim, + "output_dim": output_dim, + } + ) + def batch_process(self,input_tensor): + # 获取输入张量的维度 + batch_size, dim1, dim2, dim3 = input_tensor.shape + + # 首先,我们需要对第一个索引为0的二维数组中的非零元素增加43 + input_tensor[:, 0, :, :] += torch.where(input_tensor[:, 0, :, :] != 0, 43, 0) + + # 接着,对第一个索引为1的二维数组中的非零元素增加33 + input_tensor[:, 1, :, :] += torch.where(input_tensor[:, 1, :, :] != 0, 33, 0) + + # 创建一个全零的输出张量 + output_tensor = torch.zeros((batch_size, dim2, dim3), dtype=input_tensor.dtype, device=input_tensor.device) + + # 找到输入张量中至少有一个非零值的位置 + nonzero_mask = torch.any(input_tensor != 0, dim=1) + + # 根据优先级赋值 + output_tensor[nonzero_mask] = input_tensor[:, 2, :, :][nonzero_mask] + output_tensor[nonzero_mask] = torch.where(input_tensor[:, 2, :, :][nonzero_mask] == 0, input_tensor[:, 1, :, :][nonzero_mask], output_tensor[nonzero_mask]) + output_tensor[nonzero_mask] = torch.where(torch.logical_and(input_tensor[:, 2, :, :][nonzero_mask] == 0, input_tensor[:, 1, :, :][nonzero_mask] == 0), input_tensor[:, 0, :, :][nonzero_mask], output_tensor[nonzero_mask]) + + return output_tensor + def _forward(self, data): + temp=data["map"] + temp=self.batch_process(temp) + # a=self.embeddings["all"] + # print("temp",temp.shape,data["map"].shape) + + # 找到tensor中的最大值 + # max_value = temp.max() + + # print("最大值是:", max_value.item()) + embeddings = self.embeddings["all"](temp)#shape:[batch,256,256,48] + + # print("embeddings.shape A",embeddings.shape) + embeddings =embeddings.permute(0, 3, 1, 2) + # print("embeddings.shape B",embeddings.shape) + # print("Single",embeddings.shape) + pass + if isinstance(self.encoder, BaseModel): + # print("encoder is BaseModel") + features = self.encoder({"image": embeddings})["feature_maps"] + else: + # print("encoder is not BaseModel") + features = [self.encoder(embeddings)] + pred = {} + if self.conf.unary_prior: + pred["log_prior"] = [f[:, -1] for f in features] + features = [f[:, :-1] for f in features] + pred["map_features"] = features#6,8,256,256 list of tensor ,shape:[6,8, 256, 256] + + return pred diff --git a/models/maplocnet.py b/models/maplocnet.py new file mode 100644 index 0000000000000000000000000000000000000000..23084359f5a51a8ba995ec8124a8588840b39adf --- /dev/null +++ b/models/maplocnet.py @@ -0,0 +1,260 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +import torch +from torch.nn.functional import normalize + +from . import get_model +from models.base import BaseModel +from models.bev_net import BEVNet +from models.bev_projection import CartesianProjection, PolarProjectionDepth +from models.voting import ( + argmax_xyr,argmax_xyrh, + conv2d_fft_batchwise, + expectation_xyr, + log_softmax_spatial, + mask_yaw_prior, + nll_loss_xyr, + nll_loss_xyr_smoothed, + TemplateSampler, + UAVTemplateSampler, + UAVTemplateSamplerFast +) +import torch.nn.functional as F +from torch.nn.functional import grid_sample, log_softmax, pad +from .map_encoder import MapEncoder +from .map_encoder_single import MapEncoderSingle +from .metrics import AngleError, AngleRecall, Location2DError, Location2DRecall + + +class MapLocNet(BaseModel): + default_conf = { + "image_size": "???", + "val_citys":"???", + "image_encoder": "???", + "map_encoder": "???", + "bev_net": "???", + "latent_dim": "???", + "matching_dim": "???", + "scale_range": [0, 9], + "num_scale_bins": "???", + "z_min": None, + "z_max": "???", + "x_max": "???", + "pixel_per_meter": "???", + "num_rotations": "???", + "add_temperature": False, + "normalize_features": False, + "padding_matching": "replicate", + "apply_map_prior": True, + "do_label_smoothing": False, + "sigma_xy": 1, + "sigma_r": 2, + # depcreated + "depth_parameterization": "scale", + "norm_depth_scores": False, + "normalize_scores_by_dim": False, + "normalize_scores_by_num_valid": True, + "prior_renorm": True, + "retrieval_dim": None, + } + + def _init(self, conf): + assert not self.conf.norm_depth_scores + assert self.conf.depth_parameterization == "scale" + assert not self.conf.normalize_scores_by_dim + assert self.conf.normalize_scores_by_num_valid + assert self.conf.prior_renorm + # a=conf.image_encoder.get("name", "feature_extractor_v2") + # b=conf.image_encoder.get("name") + Encoder = get_model(conf.image_encoder.get("name")) + self.image_encoder = Encoder(conf.image_encoder.backbone) + + if len(conf.map_encoder.num_classes)==1: + self.map_encoder = MapEncoderSingle(conf.map_encoder) + else: + self.map_encoder = MapEncoder(conf.map_encoder) + # self.bev_net = None if conf.bev_net is None else BEVNet(conf.bev_net) + + ppm = conf.pixel_per_meter + # self.projection_polar = PolarProjectionDepth( + # conf.z_max, + # ppm, + # conf.scale_range, + # conf.z_min, + # ) + # self.projection_bev = CartesianProjection( + # conf.z_max, conf.x_max, ppm, conf.z_min + # ) + # self.template_sampler = TemplateSampler( + # self.projection_bev.grid_xz, ppm, conf.num_rotations + # ) + self.template_sampler = UAVTemplateSamplerFast(conf.num_rotations,w=conf.image_size//2) + # self.template_sampler = UAVTemplateSampler(conf.num_rotations) + # self.scale_classifier = torch.nn.Linear(conf.latent_dim, conf.num_scale_bins) + # if conf.bev_net is None: + # self.feature_projection = torch.nn.Linear( + # conf.latent_dim, conf.matching_dim + # ) + if conf.add_temperature: + temperature = torch.nn.Parameter(torch.tensor(0.0)) + self.register_parameter("temperature", temperature) + + def exhaustive_voting(self, f_bev, f_map): + if self.conf.normalize_features: + f_bev = normalize(f_bev, dim=1) + f_map = normalize(f_map, dim=1) + + # Build the templates and exhaustively match against the map. + # if confidence_bev is not None: + # f_bev = f_bev * confidence_bev.unsqueeze(1) + # f_bev = f_bev.masked_fill(~valid_bev.unsqueeze(1), 0.0) + # torch.save(f_bev, 'f_bev.pt') + # torch.save(f_map, 'f_map.pt') + f_map = F.interpolate(f_map, size=(256, 256), mode='bilinear', align_corners=False) + + templates = self.template_sampler(f_bev)#[batch,256,8,129,129] + # torch.save(templates, 'templates.pt') + with torch.autocast("cuda", enabled=False): + scores = conv2d_fft_batchwise( + f_map.float(), + templates.float(), + padding_mode=self.conf.padding_matching, + ) + if self.conf.add_temperature: + scores = scores * torch.exp(self.temperature) + + # Reweight the different rotations based on the number of valid pixels + # in each template. Axis-aligned rotation have the maximum number of valid pixels. + # valid_templates = self.template_sampler(valid_bev.float()[None]) > (1 - 1e-4) + # num_valid = valid_templates.float().sum((-3, -2, -1)) + # scores = scores / num_valid[..., None, None] + return scores + + def _forward(self, data): + pred = {} + pred_map = pred["map"] = self.map_encoder(data) + f_map = pred_map["map_features"][0]#[batch,8,256,256] + + # Extract image features. + level = 0 + f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176] + # print("f_map:",f_map.shape) + + scores = self.exhaustive_voting(f_image, f_map)#f_bev:[batch,8,64,129] f_map:[batch,8,256,256] confidence:[1,64,129] + scores = scores.moveaxis(1, -1) # B,H,W,N + if "log_prior" in pred_map and self.conf.apply_map_prior: + scores = scores + pred_map["log_prior"][0].unsqueeze(-1) + # pred["scores_unmasked"] = scores.clone() + if "map_mask" in data: + scores.masked_fill_(~data["map_mask"][..., None], -np.inf) + if "yaw_prior" in data: + mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations) + log_probs = log_softmax_spatial(scores) + # torch.save(scores, 'scores.pt') + with torch.no_grad(): + uvr_max = argmax_xyr(scores).to(scores) + uvr_avg, _ = expectation_xyr(log_probs.exp()) + + return { + **pred, + "scores": scores, + "log_probs": log_probs, + "uvr_max": uvr_max, + "uv_max": uvr_max[..., :2], + "yaw_max": uvr_max[..., 2], + "uvr_expectation": uvr_avg, + "uv_expectation": uvr_avg[..., :2], + "yaw_expectation": uvr_avg[..., 2], + "features_image": f_image, + } + def _forward_scale(self, data,resize=None): + pred = {} + pred_map = pred["map"] = self.map_encoder(data) + f_map = pred_map["map_features"][0]#[batch,8,256,256] + + # Extract image features. + level = 0 + f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176] + # print("f_map:",f_map.shape) + scores_list = [] + + for resize_size in resize: + f_image_re = torch.nn.functional.interpolate(f_image, size=resize_size, mode='bilinear', align_corners=False) + scores = self.exhaustive_voting(f_image_re, f_map)#f_bev:[batch,8,64,129] f_map:[batch,8,256,256] confidence:[1,64,129] + scores = scores.moveaxis(1, -1) # B,H,W,N + scores_list.append(scores) + scores_list = torch.stack(scores_list, dim=-1) + log_probs_list = log_softmax(scores_list.flatten(-4), dim=-1).reshape(scores_list.shape) + + + # if "log_prior" in pred_map and self.conf.apply_map_prior: + # scores = scores + pred_map["log_prior"][0].unsqueeze(-1) + # # pred["scores_unmasked"] = scores.clone() + # if "map_mask" in data: + # scores.masked_fill_(~data["map_mask"][..., None], -np.inf) + # if "yaw_prior" in data: + # mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations) + #scores shape:[batch,W,H,64] + + # log_probs = log_softmax_spatial(scores) + # torch.save(scores, 'scores.pt') + with torch.no_grad(): + uvr_max = argmax_xyrh(scores_list) + # uvr_avg, _ = expectation_xyr(log_probs_list.exp()) + uvr_avg= uvr_max + + return { + **pred, + "scores": scores, + "log_probs": log_probs_list, + "uvr_max": uvr_max, + "uv_max": uvr_max[..., :2], + "yaw_max": uvr_max[..., 2], + "uvr_expectation": uvr_avg, + "uv_expectation": uvr_avg[..., :2], + "yaw_expectation": uvr_avg[..., 2], + "features_image": f_image, + } + def loss(self, pred, data): + xy_gt = data["uv"] + yaw_gt = data["roll_pitch_yaw"][..., -1] + if self.conf.do_label_smoothing: + nll = nll_loss_xyr_smoothed( + pred["log_probs"], + xy_gt, + yaw_gt, + self.conf.sigma_xy / self.conf.pixel_per_meter, + self.conf.sigma_r, + mask=data.get("map_mask"), + ) + else: + nll = nll_loss_xyr(pred["log_probs"], xy_gt, yaw_gt) + loss = {"total": nll, "nll": nll} + if self.training and self.conf.add_temperature: + loss["temperature"] = self.temperature.expand(len(nll)) + return loss + + def metrics(self): + return { + "xy_max_error": Location2DError("uv_max", self.conf.pixel_per_meter), + "xy_expectation_error": Location2DError( + "uv_expectation", self.conf.pixel_per_meter + ), + "yaw_max_error": AngleError("yaw_max"), + "xy_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + "xy_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + "xy_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + + # "x_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + # "x_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + # "x_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + # + # "y_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + # "y_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + # "y_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + + "yaw_recall_1°": AngleRecall(1.0, "yaw_max"), + "yaw_recall_3°": AngleRecall(3.0, "yaw_max"), + "yaw_recall_5°": AngleRecall(5.0, "yaw_max"), + } diff --git a/models/metrics.py b/models/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..b50a724f4719853b476d693db25ddbba562a3a51 --- /dev/null +++ b/models/metrics.py @@ -0,0 +1,118 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import torch +import torchmetrics +from torchmetrics.utilities.data import dim_zero_cat + +from .utils import deg2rad, rotmat2d + + +def location_error(uv, uv_gt, ppm=1): + return torch.norm(uv - uv_gt.to(uv), dim=-1) / ppm + +def location_error_single(uv, uv_gt, ppm=1): + return torch.norm(uv - uv_gt.to(uv), dim=-1) / ppm + +def angle_error(t, t_gt): + error = torch.abs(t % 360 - t_gt.to(t) % 360) + error = torch.minimum(error, 360 - error) + return error + + +class Location2DRecall(torchmetrics.MeanMetric): + def __init__(self, threshold, pixel_per_meter, key="uv_max", *args, **kwargs): + self.threshold = threshold + self.ppm = pixel_per_meter + self.key = key + super().__init__(*args, **kwargs) + + def update(self, pred, data): + self.cuda() + error = location_error(pred[self.key], data["uv"], self.ppm) + # print(error,self.threshold) + super().update((error <= torch.tensor(self.threshold,device=error.device)).float()) + +class Location1DRecall(torchmetrics.MeanMetric): + def __init__(self, threshold, pixel_per_meter, key="uv_max", *args, **kwargs): + self.threshold = threshold + self.ppm = pixel_per_meter + self.key = key + super().__init__(*args, **kwargs) + + def update(self, pred, data): + self.cuda() + error = location_error(pred[self.key], data["uv"], self.ppm) + # print(error,self.threshold) + super().update((error <= torch.tensor(self.threshold,device=error.device)).float()) +class AngleRecall(torchmetrics.MeanMetric): + def __init__(self, threshold, key="yaw_max", *args, **kwargs): + self.threshold = threshold + self.key = key + + super().__init__(*args, **kwargs) + + def update(self, pred, data): + self.cuda() + error = angle_error(pred[self.key], data["roll_pitch_yaw"][..., -1]) + super().update((error <= self.threshold).float()) + + +class MeanMetricWithRecall(torchmetrics.Metric): + full_state_update = True + + def __init__(self): + super().__init__() + self.add_state("value", default=[], dist_reduce_fx="cat") + def compute(self): + return dim_zero_cat(self.value).mean(0) + + def get_errors(self): + return dim_zero_cat(self.value) + + def recall(self, thresholds): + self.cuda() + error = self.get_errors() + thresholds = error.new_tensor(thresholds) + return (error.unsqueeze(-1) < thresholds).float().mean(0) * 100 + + +class AngleError(MeanMetricWithRecall): + def __init__(self, key): + super().__init__() + self.key = key + + def update(self, pred, data): + self.cuda() + value = angle_error(pred[self.key], data["roll_pitch_yaw"][..., -1]) + if value.numel(): + self.value.append(value) + + +class Location2DError(MeanMetricWithRecall): + def __init__(self, key, pixel_per_meter): + super().__init__() + self.key = key + self.ppm = pixel_per_meter + + def update(self, pred, data): + self.cuda() + value = location_error(pred[self.key], data["uv"], self.ppm) + if value.numel(): + self.value.append(value) + + +class LateralLongitudinalError(MeanMetricWithRecall): + def __init__(self, pixel_per_meter, key="uv_max"): + super().__init__() + self.ppm = pixel_per_meter + self.key = key + + def update(self, pred, data): + self.cuda() + yaw = deg2rad(data["roll_pitch_yaw"][..., -1]) + shift = (pred[self.key] - data["uv"]) * yaw.new_tensor([-1, 1]) + shift = (rotmat2d(yaw) @ shift.unsqueeze(-1)).squeeze(-1) + error = torch.abs(shift) / self.ppm + value = error.view(-1, 2) + if value.numel(): + self.value.append(value) diff --git a/models/orienternet.py b/models/orienternet.py new file mode 100644 index 0000000000000000000000000000000000000000..9315f10afedc4d6cc49e4ba78e82d7ae8d510795 --- /dev/null +++ b/models/orienternet.py @@ -0,0 +1,204 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +import torch +from torch.nn.functional import normalize + +from . import get_model +from models.base import BaseModel +from models.bev_net import BEVNet +from models.bev_projection import CartesianProjection, PolarProjectionDepth +from models.voting import ( + argmax_xyr, + conv2d_fft_batchwise, + expectation_xyr, + log_softmax_spatial, + mask_yaw_prior, + nll_loss_xyr, + nll_loss_xyr_smoothed, + TemplateSampler, + UAVTemplateSampler, + UAVTemplateSamplerFast +) +from .map_encoder import MapEncoder +from .metrics import AngleError, AngleRecall, Location2DError, Location2DRecall + + +class MapLocNet(BaseModel): + default_conf = { + "image_size": "???", + "val_citys":"???", + "image_encoder": "???", + "map_encoder": "???", + "bev_net": "???", + "latent_dim": "???", + "matching_dim": "???", + "scale_range": [0, 9], + "num_scale_bins": "???", + "z_min": None, + "z_max": "???", + "x_max": "???", + "pixel_per_meter": "???", + "num_rotations": "???", + "add_temperature": False, + "normalize_features": False, + "padding_matching": "replicate", + "apply_map_prior": True, + "do_label_smoothing": False, + "sigma_xy": 1, + "sigma_r": 2, + # depcreated + "depth_parameterization": "scale", + "norm_depth_scores": False, + "normalize_scores_by_dim": False, + "normalize_scores_by_num_valid": True, + "prior_renorm": True, + "retrieval_dim": None, + } + + def _init(self, conf): + assert not self.conf.norm_depth_scores + assert self.conf.depth_parameterization == "scale" + assert not self.conf.normalize_scores_by_dim + assert self.conf.normalize_scores_by_num_valid + assert self.conf.prior_renorm + + Encoder = get_model(conf.image_encoder.get("name", "feature_extractor_v2")) + self.image_encoder = Encoder(conf.image_encoder.backbone) + self.map_encoder = MapEncoder(conf.map_encoder) + # self.bev_net = None if conf.bev_net is None else BEVNet(conf.bev_net) + + ppm = conf.pixel_per_meter + # self.projection_polar = PolarProjectionDepth( + # conf.z_max, + # ppm, + # conf.scale_range, + # conf.z_min, + # ) + # self.projection_bev = CartesianProjection( + # conf.z_max, conf.x_max, ppm, conf.z_min + # ) + # self.template_sampler = TemplateSampler( + # self.projection_bev.grid_xz, ppm, conf.num_rotations + # ) + self.template_sampler = UAVTemplateSamplerFast(conf.num_rotations,w=conf.image_size//2) + # self.template_sampler = UAVTemplateSampler(conf.num_rotations) + # self.scale_classifier = torch.nn.Linear(conf.latent_dim, conf.num_scale_bins) + # if conf.bev_net is None: + # self.feature_projection = torch.nn.Linear( + # conf.latent_dim, conf.matching_dim + # ) + if conf.add_temperature: + temperature = torch.nn.Parameter(torch.tensor(0.0)) + self.register_parameter("temperature", temperature) + + def exhaustive_voting(self, f_bev, f_map): + if self.conf.normalize_features: + f_bev = normalize(f_bev, dim=1) + f_map = normalize(f_map, dim=1) + + # Build the templates and exhaustively match against the map. + # if confidence_bev is not None: + # f_bev = f_bev * confidence_bev.unsqueeze(1) + # f_bev = f_bev.masked_fill(~valid_bev.unsqueeze(1), 0.0) + # torch.save(f_bev, 'f_bev.pt') + # torch.save(f_map, 'f_map.pt') + + templates = self.template_sampler(f_bev)#[batch,256,8,129,129] + # torch.save(templates, 'templates.pt') + with torch.autocast("cuda", enabled=False): + scores = conv2d_fft_batchwise( + f_map.float(), + templates.float(), + padding_mode=self.conf.padding_matching, + ) + if self.conf.add_temperature: + scores = scores * torch.exp(self.temperature) + + # Reweight the different rotations based on the number of valid pixels + # in each template. Axis-aligned rotation have the maximum number of valid pixels. + # valid_templates = self.template_sampler(valid_bev.float()[None]) > (1 - 1e-4) + # num_valid = valid_templates.float().sum((-3, -2, -1)) + # scores = scores / num_valid[..., None, None] + return scores + + def _forward(self, data): + pred = {} + pred_map = pred["map"] = self.map_encoder(data) + f_map = pred_map["map_features"][0]#[batch,8,256,256] + + # Extract image features. + level = 0 + f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176] + # print("f_map:",f_map.shape) + + scores = self.exhaustive_voting(f_image, f_map)#f_bev:[batch,8,64,129] f_map:[batch,8,256,256] confidence:[1,64,129] + scores = scores.moveaxis(1, -1) # B,H,W,N + if "log_prior" in pred_map and self.conf.apply_map_prior: + scores = scores + pred_map["log_prior"][0].unsqueeze(-1) + # pred["scores_unmasked"] = scores.clone() + if "map_mask" in data: + scores.masked_fill_(~data["map_mask"][..., None], -np.inf) + if "yaw_prior" in data: + mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations) + log_probs = log_softmax_spatial(scores) + # torch.save(scores, 'scores.pt') + with torch.no_grad(): + uvr_max = argmax_xyr(scores).to(scores) + uvr_avg, _ = expectation_xyr(log_probs.exp()) + + return { + **pred, + "scores": scores, + "log_probs": log_probs, + "uvr_max": uvr_max, + "uv_max": uvr_max[..., :2], + "yaw_max": uvr_max[..., 2], + "uvr_expectation": uvr_avg, + "uv_expectation": uvr_avg[..., :2], + "yaw_expectation": uvr_avg[..., 2], + "features_image": f_image, + } + + def loss(self, pred, data): + xy_gt = data["uv"] + yaw_gt = data["roll_pitch_yaw"][..., -1] + if self.conf.do_label_smoothing: + nll = nll_loss_xyr_smoothed( + pred["log_probs"], + xy_gt, + yaw_gt, + self.conf.sigma_xy / self.conf.pixel_per_meter, + self.conf.sigma_r, + mask=data.get("map_mask"), + ) + else: + nll = nll_loss_xyr(pred["log_probs"], xy_gt, yaw_gt) + loss = {"total": nll, "nll": nll} + if self.training and self.conf.add_temperature: + loss["temperature"] = self.temperature.expand(len(nll)) + return loss + + def metrics(self): + return { + "xy_max_error": Location2DError("uv_max", self.conf.pixel_per_meter), + "xy_expectation_error": Location2DError( + "uv_expectation", self.conf.pixel_per_meter + ), + "yaw_max_error": AngleError("yaw_max"), + "xy_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + "xy_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + "xy_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + + # "x_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + # "x_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + # "x_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + # + # "y_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"), + # "y_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"), + # "y_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"), + + "yaw_recall_1°": AngleRecall(1.0, "yaw_max"), + "yaw_recall_3°": AngleRecall(3.0, "yaw_max"), + "yaw_recall_5°": AngleRecall(5.0, "yaw_max"), + } diff --git a/models/sequential.py b/models/sequential.py new file mode 100644 index 0000000000000000000000000000000000000000..606be3fe7a1861c98c178f446ed8774a4755ce77 --- /dev/null +++ b/models/sequential.py @@ -0,0 +1,218 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +import torch + +from .voting import argmax_xyr, log_softmax_spatial, sample_xyr +from .utils import deg2rad, make_grid, rotmat2d + + +def log_gaussian(points, mean, sigma): + return -1 / 2 * torch.sum((points - mean) ** 2, -1) / sigma**2 + + +def log_laplace(points, mean, sigma): + return -torch.sum(torch.abs(points - mean), -1) / sigma + + +def propagate_belief( + Δ_xy, Δ_yaw, canvas_target, canvas_source, belief, num_rotations=None +): + # We allow a different sampling resolution in the target frame + if num_rotations is None: + num_rotations = belief.shape[-1] + + angles = torch.arange( + 0, 360, 360 / num_rotations, device=Δ_xy.device, dtype=Δ_xy.dtype + ) + uv_grid = make_grid(canvas_target.w, canvas_target.h, device=Δ_xy.device) + xy_grid = canvas_target.to_xy(uv_grid.to(Δ_xy)) + + Δ_xy_world = torch.einsum("nij,j->ni", rotmat2d(deg2rad(-angles)), Δ_xy) + xy_grid_prev = xy_grid[..., None, :] + Δ_xy_world[..., None, None, :, :] + uv_grid_prev = canvas_source.to_uv(xy_grid_prev).to(Δ_xy) + + angles_prev = angles + Δ_yaw + angles_grid_prev = angles_prev.tile((canvas_target.h, canvas_target.w, 1)) + + prior, valid = sample_xyr( + belief[None, None], + uv_grid_prev.to(belief)[None], + angles_grid_prev.to(belief)[None], + nearest_for_inf=True, + ) + return prior, valid + + +def markov_filtering(observations, canvas, xys, yaws, idxs=None): + assert len(observations) == len(canvas) == len(xys) == len(yaws) + if idxs is None: + idxs = range(len(observations)) + belief = None + beliefs = [] + for i in idxs: + obs = observations[i] + if belief is None: + belief = obs + else: + Δ_xy = rotmat2d(deg2rad(yaws[i])) @ (xys[i - 1] - xys[i]) + Δ_yaw = yaws[i - 1] - yaws[i] + prior, valid = propagate_belief( + Δ_xy, Δ_yaw, canvas[i], canvas[i - 1], belief + ) + prior = prior[0, 0].masked_fill_(~valid[0], -np.inf) + belief = prior + obs + belief = log_softmax_spatial(belief) + beliefs.append(belief) + uvt_seq = torch.stack([argmax_xyr(p) for p in beliefs]) + return beliefs, uvt_seq + + +def integrate_observation( + source, + target, + xy_source, + xy_target, + yaw_source, + yaw_target, + canvas_source, + canvas_target, + **kwargs +): + Δ_xy = rotmat2d(deg2rad(yaw_target)) @ (xy_source - xy_target) + Δ_yaw = yaw_source - yaw_target + prior, valid = propagate_belief( + Δ_xy, Δ_yaw, canvas_target, canvas_source, source, **kwargs + ) + prior = prior[0, 0].masked_fill_(~valid[0], -np.inf) + target.add_(prior) + target.sub_(target.max()) # normalize to avoid overflow + return prior + + +class RigidAligner: + def __init__( + self, + canvas_ref=None, + xy_ref=None, + yaw_ref=None, + num_rotations=None, + track_priors=False, + ): + self.canvas = canvas_ref + self.xy_ref = xy_ref + self.yaw_ref = yaw_ref + self.rotmat_ref = None + self.num_rotations = num_rotations + self.belief = None + self.priors = [] if track_priors else None + + self.yaw_slam2geo = None + self.Rt_slam2geo = None + + def update(self, observation, canvas, xy, yaw): + # initialization + if self.canvas is None: + self.canvas = canvas + if self.xy_ref is None: + self.xy_ref = xy + self.yaw_ref = yaw + self.rotmat_ref = rotmat2d(deg2rad(self.yaw_ref)) + if self.num_rotations is None: + self.num_rotations = observation.shape[-1] + if self.belief is None: + self.belief = observation.new_zeros( + (self.canvas.h, self.canvas.w, self.num_rotations) + ) + + prior = integrate_observation( + observation, + self.belief, + xy, + self.xy_ref, + yaw, + self.yaw_ref, + canvas, + self.canvas, + num_rotations=self.num_rotations, + ) + + if self.priors is not None: + self.priors.append(prior.cpu()) + return prior + + def update_with_ref(self, observation, canvas, xy, yaw): + if self.belief is not None: + observation = observation.clone() + integrate_observation( + self.belief, + observation, + self.xy_ref, + xy, + self.yaw_ref, + yaw, + self.canvas, + canvas, + num_rotations=observation.shape[-1], + ) + + self.belief = observation + self.canvas = canvas + self.xy_ref = xy + self.yaw_ref = yaw + + def compute(self): + uvt_align_ref = argmax_xyr(self.belief) + self.yaw_ref_align = uvt_align_ref[-1] + self.xy_ref_align = self.canvas.to_xy(uvt_align_ref[:2].double()) + + self.yaw_slam2geo = self.yaw_ref - self.yaw_ref_align + R_slam2geo = rotmat2d(deg2rad(self.yaw_slam2geo)) + t_slam2geo = self.xy_ref_align - R_slam2geo @ self.xy_ref + self.Rt_slam2geo = (R_slam2geo, t_slam2geo) + + def transform(self, xy, yaw): + if self.Rt_slam2geo is None or self.yaw_slam2geo is None: + raise ValueError("Missing transformation, call `compute()` first!") + xy_geo = self.Rt_slam2geo[1].to(xy) + xy @ self.Rt_slam2geo[0].T.to(xy) + return xy_geo, (self.yaw_slam2geo.to(yaw) + yaw) % 360 + + +class GPSAligner(RigidAligner): + def __init__(self, distribution=log_laplace, **kwargs): + self.distribution = distribution + super().__init__(**kwargs) + if self.num_rotations is None: + raise ValueError("Rotation number is required.") + angles = torch.arange(0, 360, 360 / self.num_rotations) + self.rotmats = rotmat2d(deg2rad(-angles)) + self.xy_grid = None + + def update(self, xy_gps, accuracy, canvas, xy, yaw): + # initialization + if self.canvas is None: + self.canvas = canvas + if self.xy_ref is None: + self.xy_ref = xy + self.yaw_ref = yaw + self.rotmat_ref = rotmat2d(deg2rad(self.yaw_ref)) + if self.xy_grid is None: + self.xy_grid = self.canvas.to_xy(make_grid(self.canvas.w, self.canvas.h)) + if self.belief is None: + self.belief = xy_gps.new_zeros( + (self.canvas.h, self.canvas.w, self.num_rotations) + ) + + # integration + Δ_xy = self.rotmat_ref @ (xy - self.xy_ref) + Δ_xy_world = torch.einsum("nij,j->ni", self.rotmats.to(xy), Δ_xy) + xy_grid_prev = ( + self.xy_grid.to(xy)[..., None, :] + Δ_xy_world[..., None, None, :, :] + ) + prior = self.distribution(xy_grid_prev, xy_gps, accuracy) + self.belief.add_(prior) + self.belief.sub_(self.belief.max()) # normalize to avoid overflow + + if self.priors is not None: + self.priors.append(prior.cpu()) + return prior diff --git a/models/utils.py b/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ec246b7d0adf04cc9307475867650523f67a5063 --- /dev/null +++ b/models/utils.py @@ -0,0 +1,87 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import math +from typing import Optional + +import torch + + +def checkpointed(cls, do=True): + """Adapted from the DISK implementation of Michał Tyszkiewicz.""" + assert issubclass(cls, torch.nn.Module) + + class Checkpointed(cls): + def forward(self, *args, **kwargs): + super_fwd = super(Checkpointed, self).forward + if any((torch.is_tensor(a) and a.requires_grad) for a in args): + return torch.utils.checkpoint.checkpoint(super_fwd, *args, **kwargs) + else: + return super_fwd(*args, **kwargs) + + return Checkpointed if do else cls + + +class GlobalPooling(torch.nn.Module): + def __init__(self, kind): + super().__init__() + if kind == "mean": + self.fn = torch.nn.Sequential( + torch.nn.Flatten(2), torch.nn.AdaptiveAvgPool1d(1), torch.nn.Flatten() + ) + elif kind == "max": + self.fn = torch.nn.Sequential( + torch.nn.Flatten(2), torch.nn.AdaptiveMaxPool1d(1), torch.nn.Flatten() + ) + else: + raise ValueError(f"Unknown pooling type {kind}.") + + def forward(self, x): + return self.fn(x) + + +@torch.jit.script +def make_grid( + w: float, + h: float, + step_x: float = 1.0, + step_y: float = 1.0, + orig_x: float = 0, + orig_y: float = 0, + y_up: bool = False, + device: Optional[torch.device] = None, +) -> torch.Tensor: + x, y = torch.meshgrid( + [ + torch.arange(orig_x, w + orig_x, step_x, device=device), + torch.arange(orig_y, h + orig_y, step_y, device=device), + ], + indexing="xy", + ) + if y_up: + y = y.flip(-2) + grid = torch.stack((x, y), -1) + return grid + + +@torch.jit.script +def rotmat2d(angle: torch.Tensor) -> torch.Tensor: + c = torch.cos(angle) + s = torch.sin(angle) + R = torch.stack([c, -s, s, c], -1).reshape(angle.shape + (2, 2)) + return R + + +@torch.jit.script +def rotmat2d_grad(angle: torch.Tensor) -> torch.Tensor: + c = torch.cos(angle) + s = torch.sin(angle) + R = torch.stack([-s, -c, c, -s], -1).reshape(angle.shape + (2, 2)) + return R + + +def deg2rad(x): + return x * math.pi / 180 + + +def rad2deg(x): + return x * 180 / math.pi diff --git a/models/voting.py b/models/voting.py new file mode 100644 index 0000000000000000000000000000000000000000..5a4295649a80edb5610af8d8abff5993f2ea4c06 --- /dev/null +++ b/models/voting.py @@ -0,0 +1,387 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from typing import Optional, Tuple + +import numpy as np +import torch +from torch.fft import irfftn, rfftn +from torch.nn.functional import grid_sample, log_softmax, pad + +from .metrics import angle_error +from .utils import make_grid, rotmat2d +from torchvision.transforms.functional import rotate + +class UAVTemplateSamplerFast(torch.nn.Module): + def __init__(self, num_rotations,w=128,optimize=True): + super().__init__() + + h, w = w,w + grid_xy = make_grid( + w=w, + h=h, + step_x=1, + step_y=1, + orig_y=-h//2, + orig_x=-h//2, + y_up=True, + ).cuda() + + if optimize: + assert (num_rotations % 4) == 0 + angles = torch.arange( + 0, 90, 90 / (num_rotations // 4) + ).cuda() + else: + angles = torch.arange( + 0, 360, 360 / num_rotations, device=grid_xz_bev.device + ) + rotmats = rotmat2d(angles / 180 * np.pi) + grid_xy_rot = torch.einsum("...nij,...hwj->...nhwi", rotmats, grid_xy) + + grid_ij_rot = (grid_xy_rot - grid_xy[..., :1, :1, :]) * grid_xy.new_tensor( + [1, -1] + ) + grid_ij_rot = grid_ij_rot + grid_norm = (grid_ij_rot + 0.5) / grid_ij_rot.new_tensor([w, h]) * 2 - 1 + + self.optimize = optimize + self.num_rots = num_rotations + self.register_buffer("angles", angles, persistent=False) + self.register_buffer("grid_norm", grid_norm, persistent=False) + + def forward(self, image_bev): + grid = self.grid_norm + b, c = image_bev.shape[:2] + n, h, w = grid.shape[:3] + grid = grid[None].repeat_interleave(b, 0).reshape(b * n, h, w, 2) + image = ( + image_bev[:, None] + .repeat_interleave(n, 1) + .reshape(b * n, *image_bev.shape[1:]) + ) + # print(image.shape,grid.shape,self.grid_norm.shape) + kernels = grid_sample(image, grid.to(image.dtype), align_corners=False).reshape( + b, n, c, h, w + ) + + if self.optimize: # we have computed only the first quadrant + kernels_quad234 = [torch.rot90(kernels, -i, (-2, -1)) for i in (1, 2, 3)] + kernels = torch.cat([kernels] + kernels_quad234, 1) + + return kernels +class UAVTemplateSampler(torch.nn.Module): + def __init__(self, num_rotations): + super().__init__() + + self.num_rotations = num_rotations + + def Template(self, input_features): + # 角度数量 + num_angles = self.num_rotations + # 扩展第二个维度为旋转角度数量 + input_shape = torch.tensor(input_features.shape) + output_shape = torch.cat((input_shape[:1], torch.tensor([num_angles]), input_shape[1:])).tolist() + expanded_features = torch.zeros(output_shape,device=input_features.device) + + # 生成旋转角度序列 + rotation_angles = torch.linspace(360, 0, 64 + 1)[:-1] + # rotation_angles=torch.flip(rotation_angles, dims=[0]) + # 对扩展后的特征应用不同的旋转角度 + rotated_features = [] + # print(len(rotation_angles)) + for i in range(len(rotation_angles)): + # print(rotation_angles[i].item()) + rotated_feature = rotate(input_features, rotation_angles[i].item(), fill=0) + expanded_features[:, i, :, :, :] = rotated_feature + + # 将所有旋转后的特征堆叠起来形成最终的输出向量 + # output_features = torch.stack(rotated_features, dim=1) + + # 输出向量的维度 + # output_size = [3, num_angles, 8, 128, 128] + return expanded_features # 输出调试信息,验证输出向量的维度是否正确 + def forward(self, image_bev): + + kernels=self.Template(image_bev) + + return kernels +class TemplateSampler(torch.nn.Module): + def __init__(self, grid_xz_bev, ppm, num_rotations, optimize=True): + super().__init__() + + Δ = 1 / ppm + h, w = grid_xz_bev.shape[:2] + ksize = max(w, h * 2 + 1) + radius = ksize * Δ + grid_xy = make_grid( + radius, + radius, + step_x=Δ, + step_y=Δ, + orig_y=(Δ - radius) / 2, + orig_x=(Δ - radius) / 2, + y_up=True, + ) + + if optimize: + assert (num_rotations % 4) == 0 + angles = torch.arange( + 0, 90, 90 / (num_rotations // 4), device=grid_xz_bev.device + ) + else: + angles = torch.arange( + 0, 360, 360 / num_rotations, device=grid_xz_bev.device + ) + rotmats = rotmat2d(angles / 180 * np.pi) + grid_xy_rot = torch.einsum("...nij,...hwj->...nhwi", rotmats, grid_xy) + + grid_ij_rot = (grid_xy_rot - grid_xz_bev[..., :1, :1, :]) * grid_xy.new_tensor( + [1, -1] + ) + grid_ij_rot = grid_ij_rot / Δ + grid_norm = (grid_ij_rot + 0.5) / grid_ij_rot.new_tensor([w, h]) * 2 - 1 + + self.optimize = optimize + self.num_rots = num_rotations + self.register_buffer("angles", angles, persistent=False) + self.register_buffer("grid_norm", grid_norm, persistent=False) + + def forward(self, image_bev): + grid = self.grid_norm + b, c = image_bev.shape[:2] + n, h, w = grid.shape[:3] + grid = grid[None].repeat_interleave(b, 0).reshape(b * n, h, w, 2) + image = ( + image_bev[:, None] + .repeat_interleave(n, 1) + .reshape(b * n, *image_bev.shape[1:]) + ) + kernels = grid_sample(image, grid.to(image.dtype), align_corners=False).reshape( + b, n, c, h, w + ) + + if self.optimize: # we have computed only the first quadrant + kernels_quad234 = [torch.rot90(kernels, -i, (-2, -1)) for i in (1, 2, 3)] + kernels = torch.cat([kernels] + kernels_quad234, 1) + + return kernels + + +def conv2d_fft_batchwise(signal, kernel, padding="same", padding_mode="constant"): + if padding == "same": + padding = [i // 2 for i in kernel.shape[-2:]] + padding_signal = [p for p in padding[::-1] for _ in range(2)] + signal = pad(signal, padding_signal, mode=padding_mode) + assert signal.size(-1) % 2 == 0 + + padding_kernel = [ + pad for i in [1, 2] for pad in [0, signal.size(-i) - kernel.size(-i)] + ] + kernel_padded = pad(kernel, padding_kernel) + + signal_fr = rfftn(signal, dim=(-1, -2)) + kernel_fr = rfftn(kernel_padded, dim=(-1, -2)) + + kernel_fr.imag *= -1 # flip the kernel + output_fr = torch.einsum("bc...,bdc...->bd...", signal_fr, kernel_fr) + output = irfftn(output_fr, dim=(-1, -2)) + + crop_slices = [slice(0, output.size(0)), slice(0, output.size(1))] + [ + slice(0, (signal.size(i) - kernel.size(i) + 1)) for i in [-2, -1] + ] + output = output[crop_slices].contiguous() + + return output + + +class SparseMapSampler(torch.nn.Module): + def __init__(self, num_rotations): + super().__init__() + angles = torch.arange(0, 360, 360 / self.conf.num_rotations) + rotmats = rotmat2d(angles / 180 * np.pi) + self.num_rotations = num_rotations + self.register_buffer("rotmats", rotmats, persistent=False) + + def forward(self, image_map, p2d_bev): + h, w = image_map.shape[-2:] + locations = make_grid(w, h, device=p2d_bev.device) + p2d_candidates = torch.einsum( + "kji,...i,->...kj", self.rotmats.to(p2d_bev), p2d_bev + ) + p2d_candidates = p2d_candidates[..., None, None, :, :] + locations.unsqueeze(-1) + # ... x N x W x H x K x 2 + + p2d_norm = (p2d_candidates / (image_map.new_tensor([w, h]) - 1)) * 2 - 1 + valid = torch.all((p2d_norm >= -1) & (p2d_norm <= 1), -1) + value = grid_sample( + image_map, p2d_norm.flatten(-4, -2), align_corners=True, mode="bilinear" + ) + value = value.reshape(image_map.shape[:2] + valid.shape[-4]) + return valid, value + + +def sample_xyr(volume, xy_grid, angle_grid, nearest_for_inf=False): + # (B, C, H, W, N) to (B, C, H, W, N+1) + volume_padded = pad(volume, [0, 1, 0, 0, 0, 0], mode="circular") + + size = xy_grid.new_tensor(volume.shape[-3:-1][::-1]) + xy_norm = xy_grid / (size - 1) # align_corners=True + angle_norm = (angle_grid / 360) % 1 + grid = torch.concat([angle_norm.unsqueeze(-1), xy_norm], -1) + grid_norm = grid * 2 - 1 + + valid = torch.all((grid_norm >= -1) & (grid_norm <= 1), -1) + value = grid_sample(volume_padded, grid_norm, align_corners=True, mode="bilinear") + + # if one of the values used for linear interpolation is infinite, + # we fallback to nearest to avoid propagating inf + if nearest_for_inf: + value_nearest = grid_sample( + volume_padded, grid_norm, align_corners=True, mode="nearest" + ) + value = torch.where(~torch.isfinite(value) & valid, value_nearest, value) + + return value, valid + + +def nll_loss_xyr(log_probs, xy, angle): + log_prob, _ = sample_xyr( + log_probs.unsqueeze(1), xy[:, None, None, None], angle[:, None, None, None] + ) + nll = -log_prob.reshape(-1) # remove C,H,W,N + return nll + + +def nll_loss_xyr_smoothed(log_probs, xy, angle, sigma_xy, sigma_r, mask=None): + *_, nx, ny, nr = log_probs.shape + grid_x = torch.arange(nx, device=log_probs.device, dtype=torch.float) + dx = (grid_x - xy[..., None, 0]) / sigma_xy + grid_y = torch.arange(ny, device=log_probs.device, dtype=torch.float) + dy = (grid_y - xy[..., None, 1]) / sigma_xy + dr = ( + torch.arange(0, 360, 360 / nr, device=log_probs.device, dtype=torch.float) + - angle[..., None] + ) % 360 + dr = torch.minimum(dr, 360 - dr) / sigma_r + diff = ( + dx[..., None, :, None] ** 2 + + dy[..., :, None, None] ** 2 + + dr[..., None, None, :] ** 2 + ) + pdf = torch.exp(-diff / 2) + if mask is not None: + pdf.masked_fill_(~mask[..., None], 0) + log_probs = log_probs.masked_fill(~mask[..., None], 0) + pdf /= pdf.sum((-1, -2, -3), keepdim=True) + return -torch.sum(pdf * log_probs.to(torch.float), dim=(-1, -2, -3)) + + +def log_softmax_spatial(x, dims=3): + return log_softmax(x.flatten(-dims), dim=-1).reshape(x.shape) + + +@torch.jit.script +def argmax_xy(scores: torch.Tensor) -> torch.Tensor: + indices = scores.flatten(-2).max(-1).indices + width = scores.shape[-1] + x = indices % width + y = torch.div(indices, width, rounding_mode="floor") + return torch.stack((x, y), -1) + + +@torch.jit.script +def expectation_xy(prob: torch.Tensor) -> torch.Tensor: + h, w = prob.shape[-2:] + grid = make_grid(float(w), float(h), device=prob.device).to(prob) + return torch.einsum("...hw,hwd->...d", prob, grid) + + +@torch.jit.script +def expectation_xyr( + prob: torch.Tensor, covariance: bool = False +) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + h, w, num_rotations = prob.shape[-3:] + x, y = torch.meshgrid( + [ + torch.arange(w, device=prob.device, dtype=prob.dtype), + torch.arange(h, device=prob.device, dtype=prob.dtype), + ], + indexing="xy", + ) + grid_xy = torch.stack((x, y), -1) + xy_mean = torch.einsum("...hwn,hwd->...d", prob, grid_xy) + + angles = torch.arange(0, 1, 1 / num_rotations, device=prob.device, dtype=prob.dtype) + angles = angles * 2 * np.pi + grid_cs = torch.stack([torch.cos(angles), torch.sin(angles)], -1) + cs_mean = torch.einsum("...hwn,nd->...d", prob, grid_cs) + angle = torch.atan2(cs_mean[..., 1], cs_mean[..., 0]) + angle = (angle * 180 / np.pi) % 360 + + if covariance: + xy_cov = torch.einsum("...hwn,...hwd,...hwk->...dk", prob, grid_xy, grid_xy) + xy_cov = xy_cov - torch.einsum("...d,...k->...dk", xy_mean, xy_mean) + else: + xy_cov = None + + xyr_mean = torch.cat((xy_mean, angle.unsqueeze(-1)), -1) + return xyr_mean, xy_cov + + +@torch.jit.script +def argmax_xyr(scores: torch.Tensor) -> torch.Tensor: + indices = scores.flatten(-3).max(-1).indices + width, num_rotations = scores.shape[-2:] + wr = width * num_rotations + y = torch.div(indices, wr, rounding_mode="floor") + x = torch.div(indices % wr, num_rotations, rounding_mode="floor") + angle_index = indices % num_rotations + angle = angle_index * 360 / num_rotations + xyr = torch.stack((x, y, angle), -1) + return xyr +def argmax_xyrh(scores: torch.Tensor) -> torch.Tensor: + # def argmax_xyr(scores: torch.Tensor) -> torch.Tensor: + # 找到最大值及其索引 + batch, h, w, num_rotations, height = scores.shape + max_values, max_indices = torch.max(scores.view(batch, -1), dim=1) + + # 将索引转换为对应的坐标 + # max_indices = torch.stack([(max_indices % (h * num_rotations * height)) // (num_rotations * height), + # max_indices // (h * num_rotations * height), + # (max_indices % (num_rotations * height)) // height, + # max_indices % height], dim=1) + + max_indices = torch.stack([ + torch.div(max_indices % (h * num_rotations * height), (num_rotations * height), rounding_mode='trunc'), + torch.div(max_indices, (h * num_rotations * height), rounding_mode='trunc'), + torch.div(max_indices % (num_rotations * height), height, rounding_mode='trunc'), + max_indices % height + ], dim=1) + # angle_index = indices % num_rotations + # angle = angle_index * 360 / num_rotations + angle=max_indices[..., 2]* 360 / num_rotations + max_indices[..., 2]=angle + return max_indices + +@torch.jit.script +def mask_yaw_prior( + scores: torch.Tensor, yaw_prior: torch.Tensor, num_rotations: int +) -> torch.Tensor: + step = 360 / num_rotations + step_2 = step / 2 + angles = torch.arange(step_2, 360 + step_2, step, device=scores.device) + yaw_init, yaw_range = yaw_prior.chunk(2, dim=-1) + rot_mask = angle_error(angles, yaw_init) < yaw_range + return scores.masked_fill_(~rot_mask[:, None, None], -np.inf) + + +def fuse_gps(log_prob, uv_gps, ppm, sigma=10, gaussian=False): + grid = make_grid(*log_prob.shape[-3:-1][::-1]).to(log_prob) + dist = torch.sum((grid - uv_gps) ** 2, -1) + sigma_pixel = sigma * ppm + if gaussian: + gps_log_prob = -1 / 2 * dist / sigma_pixel**2 + else: + gps_log_prob = torch.where(dist < sigma_pixel**2, 1, -np.inf) + log_prob_fused = log_softmax_spatial(log_prob + gps_log_prob.unsqueeze(-1)) + return log_prob_fused diff --git a/osm/analysis.py b/osm/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a667c21373a31482f7bbcfb41d4fa14681741260 --- /dev/null +++ b/osm/analysis.py @@ -0,0 +1,182 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from collections import Counter, defaultdict +from typing import Dict + +import matplotlib.pyplot as plt +import numpy as np +import plotly.graph_objects as go + +from .parser import ( + filter_area, + filter_node, + filter_way, + match_to_group, + parse_area, + parse_node, + parse_way, + Patterns, +) +from .reader import OSMData + + +def recover_hierarchy(counter: Counter) -> Dict: + """Recover a two-level hierarchy from the flat group labels.""" + groups = defaultdict(dict) + for k, v in sorted(counter.items(), key=lambda x: -x[1]): + if ":" in k: + prefix, group = k.split(":") + if prefix in groups and isinstance(groups[prefix], int): + groups[prefix] = {} + groups[prefix][prefix] = groups[prefix] + groups[prefix] = {} + groups[prefix][group] = v + else: + groups[k] = v + return dict(groups) + + +def bar_autolabel(rects, fontsize): + """Attach a text label above each bar in *rects*, displaying its height.""" + for rect in rects: + width = rect.get_width() + plt.gca().annotate( + f"{width}", + xy=(width, rect.get_y() + rect.get_height() / 2), + xytext=(3, 0), # 3 points vertical offset + textcoords="offset points", + ha="left", + va="center", + fontsize=fontsize, + ) + + +def plot_histogram(counts, fontsize, dpi): + fig, ax = plt.subplots(dpi=dpi, figsize=(8, 20)) + + labels = [] + for k, v in counts.items(): + if isinstance(v, dict): + labels += list(v.keys()) + v = list(v.values()) + else: + labels.append(k) + v = [v] + bars = plt.barh( + len(labels) + -len(v) + np.arange(len(v)), v, height=0.9, label=k + ) + bar_autolabel(bars, fontsize) + + ax.set_yticklabels(labels, fontsize=fontsize) + ax.axes.xaxis.set_ticklabels([]) + ax.xaxis.tick_top() + ax.invert_yaxis() + plt.yticks(np.arange(len(labels))) + plt.xscale("log") + plt.legend(ncol=len(counts), loc="upper center") + + +def count_elements(elems: Dict[int, str], filter_fn, parse_fn) -> Dict: + """Count the number of elements in each group.""" + counts = Counter() + for elem in filter(filter_fn, elems.values()): + group = parse_fn(elem.tags) + if group is None: + continue + counts[group] += 1 + counts = recover_hierarchy(counts) + return counts + + +def plot_osm_histograms(osm: OSMData, fontsize=8, dpi=150): + counts = count_elements(osm.nodes, filter_node, parse_node) + plot_histogram(counts, fontsize, dpi) + plt.title("nodes") + + counts = count_elements(osm.ways, filter_way, parse_way) + plot_histogram(counts, fontsize, dpi) + plt.title("ways") + + counts = count_elements(osm.ways, filter_area, parse_area) + plot_histogram(counts, fontsize, dpi) + plt.title("areas") + + +def plot_sankey_hierarchy(osm: OSMData): + triplets = [] + for node in filter(filter_node, osm.nodes.values()): + label = parse_node(node.tags) + if label is None: + continue + group = match_to_group(label, Patterns.nodes) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + group = "null" + if ":" in label: + key, tag = label.split(":") + if tag == "yes": + tag = key + else: + key = tag = label + triplets.append((key, tag, group)) + keys, tags, groups = list(zip(*triplets)) + counts_key_tag = Counter(zip(keys, tags)) + counts_key_tag_group = Counter(triplets) + + key2tags = defaultdict(set) + for k, t in zip(keys, tags): + key2tags[k].add(t) + key2tags = {k: sorted(t) for k, t in key2tags.items()} + keytag2group = dict(zip(zip(keys, tags), groups)) + key_names = sorted(set(keys)) + tag_names = [(k, t) for k in key_names for t in key2tags[k]] + + group_names = [] + for k in key_names: + for t in key2tags[k]: + g = keytag2group[k, t] + if g not in group_names and g != "null": + group_names.append(g) + group_names += ["null"] + + key2idx = dict(zip(key_names, range(len(key_names)))) + tag2idx = {kt: i + len(key2idx) for i, kt in enumerate(tag_names)} + group2idx = {n: i + len(key2idx) + len(tag2idx) for i, n in enumerate(group_names)} + + key_counts = Counter(keys) + key_text = [f"{k} {key_counts[k]}" for k in key_names] + tag_counts = Counter(list(zip(keys, tags))) + tag_text = [f"{t} {tag_counts[k, t]}" for k, t in tag_names] + group_counts = Counter(groups) + group_text = [f"{k} {group_counts[k]}" for k in group_names] + + fig = go.Figure( + data=[ + go.Sankey( + orientation="h", + node=dict( + pad=15, + thickness=20, + line=dict(color="black", width=0.5), + label=key_text + tag_text + group_text, + x=[0] * len(key_names) + + [1] * len(tag_names) + + [2] * len(group_names), + color="blue", + ), + arrangement="fixed", + link=dict( + source=[key2idx[k] for k, _ in counts_key_tag] + + [tag2idx[k, t] for k, t, _ in counts_key_tag_group], + target=[tag2idx[k, t] for k, t in counts_key_tag] + + [group2idx[g] for _, _, g in counts_key_tag_group], + value=list(counts_key_tag.values()) + + list(counts_key_tag_group.values()), + ), + ) + ] + ) + fig.update_layout(autosize=False, width=800, height=2000, font_size=10) + fig.show() + return fig diff --git a/osm/data.py b/osm/data.py new file mode 100644 index 0000000000000000000000000000000000000000..dafc568f8ad5ac8c72ea9ffbd096838e0693ad84 --- /dev/null +++ b/osm/data.py @@ -0,0 +1,230 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Set, Tuple + +import numpy as np + +from .parser import ( + filter_area, + filter_node, + filter_way, + match_to_group, + parse_area, + parse_node, + parse_way, + Patterns, +) +from .reader import OSMData, OSMNode, OSMRelation, OSMWay + + +logger = logging.getLogger(__name__) + + +def glue(ways: List[OSMWay]) -> List[List[OSMNode]]: + result: List[List[OSMNode]] = [] + to_process: Set[Tuple[OSMNode]] = set() + + for way in ways: + if way.is_cycle(): + result.append(way.nodes) + else: + to_process.add(tuple(way.nodes)) + + while to_process: + nodes: List[OSMNode] = list(to_process.pop()) + glued: Optional[List[OSMNode]] = None + other_nodes: Optional[Tuple[OSMNode]] = None + + for other_nodes in to_process: + glued = try_to_glue(nodes, list(other_nodes)) + if glued is not None: + break + + if glued is not None: + to_process.remove(other_nodes) + if is_cycle(glued): + result.append(glued) + else: + to_process.add(tuple(glued)) + else: + result.append(nodes) + + return result + + +def is_cycle(nodes: List[OSMNode]) -> bool: + """Is way a cycle way or an area boundary.""" + return nodes[0] == nodes[-1] + + +def try_to_glue(nodes: List[OSMNode], other: List[OSMNode]) -> Optional[List[OSMNode]]: + """Create new combined way if ways share endpoints.""" + if nodes[0] == other[0]: + return list(reversed(other[1:])) + nodes + if nodes[0] == other[-1]: + return other[:-1] + nodes + if nodes[-1] == other[-1]: + return nodes + list(reversed(other[:-1])) + if nodes[-1] == other[0]: + return nodes + other[1:] + return None + + +def multipolygon_from_relation(rel: OSMRelation, osm: OSMData): + inner_ways = [] + outer_ways = [] + for member in rel.members: + if member.type_ == "way": + if member.role == "inner": + if member.ref in osm.ways: + inner_ways.append(osm.ways[member.ref]) + elif member.role == "outer": + if member.ref in osm.ways: + outer_ways.append(osm.ways[member.ref]) + else: + logger.warning(f'Unknown member role "{member.role}".') + if outer_ways: + inners_path = glue(inner_ways) + outers_path = glue(outer_ways) + return inners_path, outers_path + + +@dataclass +class MapElement: + id_: int + label: str + group: str + tags: Optional[Dict[str, str]] + + +@dataclass +class MapNode(MapElement): + xy: np.ndarray + + @classmethod + def from_osm(cls, node: OSMNode, label: str, group: str): + return cls( + node.id_, + label, + group, + node.tags, + xy=node.xy, + ) + + +@dataclass +class MapLine(MapElement): + xy: np.ndarray + + @classmethod + def from_osm(cls, way: OSMWay, label: str, group: str): + xy = np.stack([n.xy for n in way.nodes]) + return cls( + way.id_, + label, + group, + way.tags, + xy=xy, + ) + + +@dataclass +class MapArea(MapElement): + outers: List[np.ndarray] + inners: List[np.ndarray] = field(default_factory=list) + + @classmethod + def from_relation(cls, rel: OSMRelation, label: str, group: str, osm: OSMData): + outers_inners = multipolygon_from_relation(rel, osm) + if outers_inners is None: + return None + outers, inners = outers_inners + outers = [np.stack([n.xy for n in way]) for way in outers] + inners = [np.stack([n.xy for n in way]) for way in inners] + return cls( + rel.id_, + label, + group, + rel.tags, + outers=outers, + inners=inners, + ) + + @classmethod + def from_way(cls, way: OSMWay, label: str, group: str): + xy = np.stack([n.xy for n in way.nodes]) + return cls( + way.id_, + label, + group, + way.tags, + outers=[xy], + ) + + +class MapData: + def __init__(self): + self.nodes: Dict[int, MapNode] = {} + self.lines: Dict[int, MapLine] = {} + self.areas: Dict[int, MapArea] = {} + + @classmethod + def from_osm(cls, osm: OSMData): + self = cls() + + for node in filter(filter_node, osm.nodes.values()): + label = parse_node(node.tags) + if label is None: + continue + group = match_to_group(label, Patterns.nodes) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + continue # missing + self.nodes[node.id_] = MapNode.from_osm(node, label, group) + + for way in filter(filter_way, osm.ways.values()): + label = parse_way(way.tags) + if label is None: + continue + group = match_to_group(label, Patterns.ways) + if group is None: + group = match_to_group(label, Patterns.nodes) + if group is None: + continue # missing + self.lines[way.id_] = MapLine.from_osm(way, label, group) + + for area in filter(filter_area, osm.ways.values()): + label = parse_area(area.tags) + if label is None: + continue + group = match_to_group(label, Patterns.areas) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + group = match_to_group(label, Patterns.nodes) + if group is None: + continue # missing + self.areas[area.id_] = MapArea.from_way(area, label, group) + + for rel in osm.relations.values(): + if rel.tags.get("type") != "multipolygon": + continue + label = parse_area(rel.tags) + if label is None: + continue + group = match_to_group(label, Patterns.areas) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + group = match_to_group(label, Patterns.nodes) + if group is None: + continue # missing + area = MapArea.from_relation(rel, label, group, osm) + assert rel.id_ not in self.areas # not sure if there can be collision + if area is not None: + self.areas[rel.id_] = area + + return self diff --git a/osm/download.py b/osm/download.py new file mode 100644 index 0000000000000000000000000000000000000000..abaaaf4d6fb7c8c3034db8a152bbf4cff6135dc2 --- /dev/null +++ b/osm/download.py @@ -0,0 +1,119 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import json +from pathlib import Path +from typing import Dict, Optional + +import urllib3 + + +from utils.geo import BoundaryBox +import urllib.request +import requests + +def get_osm( + boundary_box: BoundaryBox, + cache_path: Optional[Path] = None, + overwrite: bool = False, +) -> str: + if not overwrite and cache_path is not None and cache_path.is_file(): + with cache_path.open() as fp: + return json.load(fp) + + (bottom, left), (top, right) = boundary_box.min_, boundary_box.max_ + content: bytes = get_web_data( + # "https://api.openstreetmap.org/api/0.6/map.json", + #"https://openstreetmap.erniubot.live/api/0.6/map.json", + #"https://openstreetmap.erniuapi.com/api/0.6/map.json", + 'https://osm.erniuapi.org/api/0.6/map.json', + # 'http://localhost:29505/api/map', + # "https://lz4.overpass-api.de/api/interpreter", + {"bbox": f"{left},{bottom},{right},{top}"}, + ) + + content_str = content.decode("utf-8") + if content_str.startswith("You requested too many nodes"): + raise ValueError(content_str) + + if cache_path is not None: + with cache_path.open("bw+") as fp: + fp.write(content) + a=json.loads(content_str) + return json.loads(content_str) + + +def get_web_data(address: str, parameters: Dict[str, str]) -> bytes: + # logger.info("Getting %s...", address) + # proxy_address = "http://107.173.122.186:3128" + # + # # 设置代理服务器地址和端口 + # proxies = { + # 'http': proxy_address, + # 'https': proxy_address + # } + + # 发送GET请求并返回响应数据 + # response = requests.get(address, params=parameters, timeout=100, proxies=proxies) + print('url:',address) + response = requests.get(address, params=parameters, timeout=100) + return response.content +def get_web_data(address: str, parameters: Dict[str, str]) -> bytes: + # logger.info("Getting %s...", address) + while True: + try: + # proxy_address = "http://107.173.122.186:3128" + # + # # 设置代理服务器地址和端口 + # proxies = { + # 'http': proxy_address, + # 'https': proxy_address + # } + # # 发送GET请求并返回响应数据 + response = requests.get(address, params=parameters, timeout=100) + request = requests.Request('GET', address, params=parameters) + prepared_request = request.prepare() + # 获取完整URL + full_url = prepared_request.url + break + + except Exception as e: + # 打印错误信息 + print(f"发生错误: {e}") + print("重试...") + + return response.content +# def get_web_data_2(address: str, parameters: Dict[str, str]) -> bytes: +# # logger.info("Getting %s...", address) +# proxy_address="http://107.173.122.186:3128" +# http = urllib3.PoolManager(proxy_url=proxy_address) +# result = http.request("GET", address, parameters, timeout=100) +# return result.data +# +# +# def get_web_data_1(address: str, parameters: Dict[str, str]) -> bytes: +# +# # 设置代理服务器地址和端口 +# proxy_address = "http://107.173.122.186:3128" +# +# # 创建ProxyHandler对象 +# proxy_handler = urllib.request.ProxyHandler({'http': proxy_address}) +# +# # 构建查询字符串 +# query_string = urllib.parse.urlencode(parameters) +# +# # 构建完整的URL +# url = address + '?' + query_string +# print(url) +# # 创建OpenerDirector对象,并将ProxyHandler对象作为参数传递 +# opener = urllib.request.build_opener(proxy_handler) +# +# # 使用OpenerDirector对象发送请求 +# response = opener.open(url) +# +# # 发送GET请求 +# # response = urllib.request.urlopen(url, timeout=100) +# +# # 读取响应内容 +# data = response.read() +# print() +# return data \ No newline at end of file diff --git a/osm/parser.py b/osm/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..d235c71bdbdd22d280d60015b5941d25ba0345e1 --- /dev/null +++ b/osm/parser.py @@ -0,0 +1,255 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import logging +import re +from typing import List + +from .reader import OSMData, OSMElement, OSMNode, OSMWay + +IGNORE_TAGS = {"source", "phone", "entrance", "inscription", "note", "name"} + + +def parse_levels(string: str) -> List[float]: + """Parse string representation of level sequence value.""" + try: + cleaned = string.replace(",", ";").replace(" ", "") + return list(map(float, cleaned.split(";"))) + except ValueError: + logging.debug("Cannot parse level description from `%s`.", string) + return [] + + +def filter_level(elem: OSMElement): + level = elem.tags.get("level") + if level is not None: + levels = parse_levels(level) + # In the US, ground floor levels are sometimes marked as level=1 + # so let's be conservative and include it. + if not (0 in levels or 1 in levels): + return False + layer = elem.tags.get("layer") + if layer is not None: + layer = parse_levels(layer) + if len(layer) > 0 and max(layer) < 0: + return False + return ( + elem.tags.get("location") != "underground" + and elem.tags.get("parking") != "underground" + ) + + +def filter_node(node: OSMNode): + return len(node.tags.keys() - IGNORE_TAGS) > 0 and filter_level(node) + + +def is_area(way: OSMWay): + if way.nodes[0] != way.nodes[-1]: + return False + if way.tags.get("area") == "no": + return False + filters = [ + "area", + "building", + "amenity", + "indoor", + "landuse", + "landcover", + "leisure", + "public_transport", + "shop", + ] + for f in filters: + if f in way.tags and way.tags.get(f) != "no": + return True + if way.tags.get("natural") in {"wood", "grassland", "water"}: + return True + return False + + +def filter_area(way: OSMWay): + return len(way.tags.keys() - IGNORE_TAGS) > 0 and is_area(way) and filter_level(way) + + +def filter_way(way: OSMWay): + return not filter_area(way) and way.tags != {} and filter_level(way) + + +def parse_node(tags): + keys = tags.keys() + for key in [ + "amenity", + "natural", + "highway", + "barrier", + "shop", + "tourism", + "public_transport", + "emergency", + "man_made", + ]: + if key in keys: + if "disused" in tags[key]: + continue + return f"{key}:{tags[key]}" + return None + + +def parse_area(tags): + if "building" in tags: + group = "building" + kind = tags["building"] + if kind == "yes": + for key in ["amenity", "tourism"]: + if key in tags: + kind = tags[key] + break + if kind != "yes": + group += f":{kind}" + return group + if "area:highway" in tags: + return f'highway:{tags["area:highway"]}' + for key in [ + "amenity", + "landcover", + "leisure", + "shop", + "highway", + "tourism", + "natural", + "waterway", + "landuse", + ]: + if key in tags: + return f"{key}:{tags[key]}" + return None + + +def parse_way(tags): + keys = tags.keys() + for key in ["highway", "barrier", "natural"]: + if key in keys: + return f"{key}:{tags[key]}" + return None + + +def match_to_group(label, patterns): + for group, pattern in patterns.items(): + if re.match(pattern, label): + return group + return None + + +class Patterns: + areas = dict( + building="building($|:.*?)*", + parking="amenity:parking", + playground="leisure:(playground|pitch)", + grass="(landuse:grass|landcover:grass|landuse:meadow|landuse:flowerbed|natural:grassland)", + park="leisure:(park|garden|dog_park)", + forest="(landuse:forest|natural:wood)", + water="(natural:water|waterway:*)", + ) + # + ways: road, path + # + node: fountain, bicycle_parking + + ways = dict( + fence="barrier:(fence|yes)", + wall="barrier:(wall|retaining_wall)", + hedge="barrier:hedge", + kerb="barrier:kerb", + building_outline="building($|:.*?)*", + cycleway="highway:cycleway", + path="highway:(pedestrian|footway|steps|path|corridor)", + road="highway:(motorway|trunk|primary|secondary|tertiary|service|construction|track|unclassified|residential|.*_link)", + busway="highway:busway", + tree_row="natural:tree_row", # maybe merge with node? + ) + # + nodes: bollard + + nodes = dict( + tree="natural:tree", + stone="(natural:stone|barrier:block)", + crossing="highway:crossing", + lamp="highway:street_lamp", + traffic_signal="highway:traffic_signals", + bus_stop="highway:bus_stop", + stop_sign="highway:stop", + junction="highway:motorway_junction", + bus_stop_position="public_transport:stop_position", + gate="barrier:(gate|lift_gate|swing_gate|cycle_barrier)", + bollard="barrier:bollard", + shop="(shop.*?|amenity:(bank|post_office))", + restaurant="amenity:(restaurant|fast_food)", + bar="amenity:(cafe|bar|pub|biergarten)", + pharmacy="amenity:pharmacy", + fuel="amenity:fuel", + bicycle_parking="amenity:(bicycle_parking|bicycle_rental)", + charging_station="amenity:charging_station", + parking_entrance="amenity:parking_entrance", + atm="amenity:atm", + toilets="amenity:toilets", + vending_machine="amenity:vending_machine", + fountain="amenity:fountain", + waste_basket="amenity:(waste_basket|waste_disposal)", + bench="amenity:bench", + post_box="amenity:post_box", + artwork="tourism:artwork", + recycling="amenity:recycling", + give_way="highway:give_way", + clock="amenity:clock", + fire_hydrant="emergency:fire_hydrant", + pole="man_made:(flagpole|utility_pole)", + street_cabinet="man_made:street_cabinet", + ) + # + ways: kerb + + +class Groups: + areas = list(Patterns.areas) + ways = list(Patterns.ways) + nodes = list(Patterns.nodes) + + +def group_elements(osm: OSMData): + elem2group = { + "area": {}, + "way": {}, + "node": {}, + } + + for node in filter(filter_node, osm.nodes.values()): + label = parse_node(node.tags) + if label is None: + continue + group = match_to_group(label, Patterns.nodes) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + continue # missing + elem2group["node"][node.id_] = group + + for way in filter(filter_way, osm.ways.values()): + label = parse_way(way.tags) + if label is None: + continue + group = match_to_group(label, Patterns.ways) + if group is None: + group = match_to_group(label, Patterns.nodes) + if group is None: + continue # missing + elem2group["way"][way.id_] = group + + for area in filter(filter_area, osm.ways.values()): + label = parse_area(area.tags) + if label is None: + continue + group = match_to_group(label, Patterns.areas) + if group is None: + group = match_to_group(label, Patterns.ways) + if group is None: + group = match_to_group(label, Patterns.nodes) + if group is None: + continue # missing + elem2group["area"][area.id_] = group + + return elem2group diff --git a/osm/raster.py b/osm/raster.py new file mode 100644 index 0000000000000000000000000000000000000000..9e203bc50db0d873a9c8544ed0a1533c84a38df9 --- /dev/null +++ b/osm/raster.py @@ -0,0 +1,103 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from typing import Dict, List + +import cv2 +import numpy as np +import torch + +from utils.geo import BoundaryBox +from .data import MapArea, MapLine, MapNode +from .parser import Groups + + +class Canvas: + def __init__(self, bbox: BoundaryBox, ppm: float): + self.bbox = bbox + self.ppm = ppm + self.scaling = bbox.size * ppm + self.w, self.h = np.ceil(self.scaling).astype(int) + self.clear() + + def clear(self): + self.raster = np.zeros((self.h, self.w), np.uint8) + + def to_uv(self, xy: np.ndarray): + xy = self.bbox.normalize(xy) + xy[..., 1] = 1 - xy[..., 1] + s = self.scaling + if isinstance(xy, torch.Tensor): + s = torch.from_numpy(s).to(xy) + return xy * s - 0.5 + + def to_xy(self, uv: np.ndarray): + s = self.scaling + if isinstance(uv, torch.Tensor): + s = torch.from_numpy(s).to(uv) + xy = (uv + 0.5) / s + xy[..., 1] = 1 - xy[..., 1] + return self.bbox.unnormalize(xy) + + def draw_polygon(self, xy: np.ndarray): + uv = self.to_uv(xy) + cv2.fillPoly(self.raster, uv[None].astype(np.int32), 255) + + def draw_multipolygon(self, xys: List[np.ndarray]): + uvs = [self.to_uv(xy).round().astype(np.int32) for xy in xys] + cv2.fillPoly(self.raster, uvs, 255) + + def draw_line(self, xy: np.ndarray, width: float = 1): + uv = self.to_uv(xy) + cv2.polylines( + self.raster, uv[None].round().astype(np.int32), False, 255, thickness=width + ) + + def draw_cell(self, xy: np.ndarray): + if not self.bbox.contains(xy): + return + uv = self.to_uv(xy) + self.raster[tuple(uv.round().astype(int).T[::-1])] = 255 + + +def render_raster_masks( + nodes: List[MapNode], + lines: List[MapLine], + areas: List[MapArea], + canvas: Canvas, +) -> Dict[str, np.ndarray]: + all_groups = Groups.areas + Groups.ways + Groups.nodes + masks = {k: np.zeros((canvas.h, canvas.w), np.uint8) for k in all_groups} + + for area in areas: + canvas.raster = masks[area.group] + outlines = area.outers + area.inners + canvas.draw_multipolygon(outlines) + if area.group == "building": + canvas.raster = masks["building_outline"] + for line in outlines: + canvas.draw_line(line) + + for line in lines: + canvas.raster = masks[line.group] + canvas.draw_line(line.xy) + + for node in nodes: + canvas.raster = masks[node.group] + canvas.draw_cell(node.xy) + + return masks + + +def mask_to_idx(group2mask: Dict[str, np.ndarray], groups: List[str]) -> np.ndarray: + masks = np.stack([group2mask[k] for k in groups]) > 0 + void = ~np.any(masks, 0) + idx = np.argmax(masks, 0) + idx = np.where(void, np.zeros_like(idx), idx + 1) # add background + return idx + + +def render_raster_map(masks: Dict[str, np.ndarray]) -> np.ndarray: + areas = mask_to_idx(masks, Groups.areas) + ways = mask_to_idx(masks, Groups.ways) + nodes = mask_to_idx(masks, Groups.nodes) + return np.stack([areas, ways, nodes]) diff --git a/osm/reader.py b/osm/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..793ad1879f8b2068cd5265bed408be14719e9680 --- /dev/null +++ b/osm/reader.py @@ -0,0 +1,310 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import json +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +from lxml import etree +import numpy as np + +from utils.geo import BoundaryBox, Projection + +METERS_PATTERN: re.Pattern = re.compile("^(?P\\d*\\.?\\d*)\\s*m$") +KILOMETERS_PATTERN: re.Pattern = re.compile("^(?P\\d*\\.?\\d*)\\s*km$") +MILES_PATTERN: re.Pattern = re.compile("^(?P\\d*\\.?\\d*)\\s*mi$") + + +def parse_float(string: str) -> Optional[float]: + """Parse string representation of a float or integer value.""" + try: + return float(string) + except (TypeError, ValueError): + return None + + +@dataclass(eq=False) +class OSMElement: + """ + Something with tags (string to string mapping). + """ + + id_: int + tags: Dict[str, str] + + def get_float(self, key: str) -> Optional[float]: + """Parse float from tag value.""" + if key in self.tags: + return parse_float(self.tags[key]) + return None + + def get_length(self, key: str) -> Optional[float]: + """Get length in meters.""" + if key not in self.tags: + return None + + value: str = self.tags[key] + + float_value: float = parse_float(value) + if float_value is not None: + return float_value + + for pattern, ratio in [ + (METERS_PATTERN, 1.0), + (KILOMETERS_PATTERN, 1000.0), + (MILES_PATTERN, 1609.344), + ]: + matcher: re.Match = pattern.match(value) + if matcher: + float_value: float = parse_float(matcher.group("value")) + if float_value is not None: + return float_value * ratio + + return None + + def __hash__(self) -> int: + return self.id_ + + +@dataclass(eq=False) +class OSMNode(OSMElement): + """ + OpenStreetMap node. + + See https://wiki.openstreetmap.org/wiki/Node + """ + + geo: np.ndarray + visible: Optional[str] = None + xy: Optional[np.ndarray] = None + + @classmethod + def from_dict(cls, structure: Dict[str, Any]) -> "OSMNode": + """ + Parse node from Overpass-like structure. + + :param structure: input structure + """ + return cls( + structure["id"], + structure.get("tags", {}), + geo=np.array((structure["lat"], structure["lon"])), + visible=structure.get("visible"), + ) + + +@dataclass(eq=False) +class OSMWay(OSMElement): + """ + OpenStreetMap way. + + See https://wiki.openstreetmap.org/wiki/Way + """ + + nodes: Optional[List[OSMNode]] = field(default_factory=list) + visible: Optional[str] = None + + @classmethod + def from_dict( + cls, structure: Dict[str, Any], nodes: Dict[int, OSMNode] + ) -> "OSMWay": + """ + Parse way from Overpass-like structure. + + :param structure: input structure + :param nodes: node structure + """ + return cls( + structure["id"], + structure.get("tags", {}), + [nodes[x] for x in structure["nodes"]], + visible=structure.get("visible"), + ) + + def is_cycle(self) -> bool: + """Is way a cycle way or an area boundary.""" + return self.nodes[0] == self.nodes[-1] + + def __repr__(self) -> str: + return f"Way <{self.id_}> {self.nodes}" + + +@dataclass +class OSMMember: + """ + Member of OpenStreetMap relation. + """ + + type_: str + ref: int + role: str + + +@dataclass(eq=False) +class OSMRelation(OSMElement): + """ + OpenStreetMap relation. + + See https://wiki.openstreetmap.org/wiki/Relation + """ + + members: Optional[List[OSMMember]] + visible: Optional[str] = None + + @classmethod + def from_dict(cls, structure: Dict[str, Any]) -> "OSMRelation": + """ + Parse relation from Overpass-like structure. + + :param structure: input structure + """ + return cls( + structure["id"], + structure["tags"], + [OSMMember(x["type"], x["ref"], x["role"]) for x in structure["members"]], + visible=structure.get("visible"), + ) + + +class OSMData: + """ + The whole OpenStreetMap information about nodes, ways, and relations. + """ + + def __init__(self) -> None: + self.nodes: Dict[int, OSMNode] = {} + self.ways: Dict[int, OSMWay] = {} + self.relations: Dict[int, OSMRelation] = {} + self.box: BoundaryBox = None + + @classmethod + def from_dict(cls, structure: Dict[str, Any]): + data = cls() + bounds = structure.get("bounds") + if bounds is not None: + data.box = BoundaryBox( + np.array([bounds["minlat"], bounds["minlon"]]), + np.array([bounds["maxlat"], bounds["maxlon"]]), + ) + + for element in structure["elements"]: + if element["type"] == "node": + node = OSMNode.from_dict(element) + data.add_node(node) + for element in structure["elements"]: + if element["type"] == "way": + way = OSMWay.from_dict(element, data.nodes) + data.add_way(way) + for element in structure["elements"]: + if element["type"] == "relation": + relation = OSMRelation.from_dict(element) + data.add_relation(relation) + + return data + + @classmethod + def from_json(cls, path: Path): + with path.open(encoding='utf-8') as fid: + structure = json.load(fid) + return cls.from_dict(structure) + + @classmethod + def from_xml(cls, path: Path): + root = etree.parse(str(path)).getroot() + structure = {"elements": []} + from tqdm import tqdm + + for elem in tqdm(root): + if elem.tag == "bounds": + structure["bounds"] = { + k: float(elem.attrib[k]) + for k in ("minlon", "minlat", "maxlon", "maxlat") + } + elif elem.tag in {"node", "way", "relation"}: + if elem.tag == "node": + item = { + "id": int(elem.attrib["id"]), + "lat": float(elem.attrib["lat"]), + "lon": float(elem.attrib["lon"]), + "visible": elem.attrib.get("visible"), + "tags": { + x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag" + }, + } + elif elem.tag == "way": + item = { + "id": int(elem.attrib["id"]), + "visible": elem.attrib.get("visible"), + "tags": { + x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag" + }, + "nodes": [int(x.attrib["ref"]) for x in elem if x.tag == "nd"], + } + elif elem.tag == "relation": + item = { + "id": int(elem.attrib["id"]), + "visible": elem.attrib.get("visible"), + "tags": { + x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag" + }, + "members": [ + { + "type": x.attrib["type"], + "ref": int(x.attrib["ref"]), + "role": x.attrib["role"], + } + for x in elem + if x.tag == "member" + ], + } + item["type"] = elem.tag + structure["elements"].append(item) + elem.clear() + del root + return cls.from_dict(structure) + + @classmethod + def from_file(cls, path: Path): + ext = path.suffix + if ext == ".json": + return cls.from_json(path) + elif ext in {".osm", ".xml"}: + return cls.from_xml(path) + else: + raise ValueError(f"Unknown extension for {path}") + + def add_node(self, node: OSMNode): + """Add node and update map parameters.""" + if node.id_ in self.nodes: + raise ValueError(f"Node with duplicate id {node.id_}.") + self.nodes[node.id_] = node + + def add_way(self, way: OSMWay): + """Add way and update map parameters.""" + if way.id_ in self.ways: + raise ValueError(f"Way with duplicate id {way.id_}.") + self.ways[way.id_] = way + + def add_relation(self, relation: OSMRelation): + """Add relation and update map parameters.""" + if relation.id_ in self.relations: + raise ValueError(f"Relation with duplicate id {relation.id_}.") + self.relations[relation.id_] = relation + + def add_xy_to_nodes(self, proj: Projection): + nodes = list(self.nodes.values()) + if len(nodes) == 0: + return + geos = np.stack([n.geo for n in nodes], 0) + if proj.bounds is not None: + # For some reasons few nodes are sometimes very far off the initial bbox. + valid = proj.bounds.contains(geos) + if valid.mean() < 0.9: + print("Many nodes are out of the projection bounds.") + xys = np.zeros_like(geos) + xys[valid] = proj.project(geos[valid]) + else: + xys = proj.project(geos) + for xy, node in zip(xys, nodes): + node.xy = xy diff --git a/osm/tiling.py b/osm/tiling.py new file mode 100644 index 0000000000000000000000000000000000000000..7badfdc480370752f8a8ad82922599a5357b0119 --- /dev/null +++ b/osm/tiling.py @@ -0,0 +1,310 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import io +import pickle +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import numpy as np +from PIL import Image +import rtree + +from utils.geo import BoundaryBox, Projection +from .data import MapData +from .download import get_osm +from .parser import Groups +from .raster import Canvas, render_raster_map, render_raster_masks +from .reader import OSMData, OSMNode, OSMWay + + +class MapIndex: + def __init__( + self, + data: MapData, + ): + self.index_nodes = rtree.index.Index() + for i, node in data.nodes.items(): + self.index_nodes.insert(i, tuple(node.xy) * 2) + + self.index_lines = rtree.index.Index() + for i, line in data.lines.items(): + bbox = tuple(np.r_[line.xy.min(0), line.xy.max(0)]) + self.index_lines.insert(i, bbox) + + self.index_areas = rtree.index.Index() + for i, area in data.areas.items(): + xy = np.concatenate(area.outers + area.inners) + bbox = tuple(np.r_[xy.min(0), xy.max(0)]) + self.index_areas.insert(i, bbox) + + self.data = data + + def query(self, bbox: BoundaryBox) -> Tuple[List[OSMNode], List[OSMWay]]: + query = tuple(np.r_[bbox.min_, bbox.max_]) + ret = [] + for x in ["nodes", "lines", "areas"]: + ids = getattr(self, "index_" + x).intersection(query) + ret.append([getattr(self.data, x)[i] for i in ids]) + return tuple(ret) + + +def bbox_to_slice(bbox: BoundaryBox, canvas: Canvas): + uv_min = np.ceil(canvas.to_uv(bbox.min_)).astype(int) + uv_max = np.ceil(canvas.to_uv(bbox.max_)).astype(int) + slice_ = (slice(uv_max[1], uv_min[1]), slice(uv_min[0], uv_max[0])) + return slice_ + + +def round_bbox(bbox: BoundaryBox, origin: np.ndarray, ppm: int): + bbox = bbox.translate(-origin) + bbox = BoundaryBox(np.round(bbox.min_ * ppm) / ppm, np.round(bbox.max_ * ppm) / ppm) + return bbox.translate(origin) + +class MapTileManager: + def __init__( + self, + osmpath:Path, + ): + + self.osm = OSMData.from_file(osmpath) + + + # @classmethod + def from_bbox( + self, + projection: Projection, + bbox: BoundaryBox, + ppm: int, + tile_size: int = 128, + ): + # bbox_osm = projection.unproject(bbox) + # if path is not None and path.is_file(): + # print(OSMData.from_file) + # osm = OSMData.from_file(path) + # if osm.box is not None: + # assert osm.box.contains(bbox_osm) + # else: + # osm = OSMData.from_dict(get_osm(bbox_osm, path)) + + self.osm.add_xy_to_nodes(projection) + map_data = MapData.from_osm(self.osm) + map_index = MapIndex(map_data) + + bounds_x, bounds_y = [ + np.r_[np.arange(min_, max_, tile_size), max_] + for min_, max_ in zip(bbox.min_, bbox.max_) + ] + bbox_tiles = {} + for i, xmin in enumerate(bounds_x[:-1]): + for j, ymin in enumerate(bounds_y[:-1]): + bbox_tiles[i, j] = BoundaryBox( + [xmin, ymin], [bounds_x[i + 1], bounds_y[j + 1]] + ) + + tiles = {} + for ij, bbox_tile in bbox_tiles.items(): + canvas = Canvas(bbox_tile, ppm) + nodes, lines, areas = map_index.query(bbox_tile) + masks = render_raster_masks(nodes, lines, areas, canvas) + canvas.raster = render_raster_map(masks) + tiles[ij] = canvas + + groups = {k: v for k, v in vars(Groups).items() if not k.startswith("__")} + + self.origin = bbox.min_ + self.bbox = bbox + self.tiles = tiles + self.tile_size = tile_size + self.ppm = ppm + self.projection = projection + self.groups = groups + self.map_data = map_data + + return self.query(bbox) + # return cls(tiles, bbox, tile_size, ppm, projection, groups, map_data) + + def query(self, bbox: BoundaryBox) -> Canvas: + bbox = round_bbox(bbox, self.bbox.min_, self.ppm) + canvas = Canvas(bbox, self.ppm) + raster = np.zeros((3, canvas.h, canvas.w), np.uint8) + + bbox_all = bbox & self.bbox + ij_min = np.floor((bbox_all.min_ - self.origin) / self.tile_size).astype(int) + ij_max = np.ceil((bbox_all.max_ - self.origin) / self.tile_size).astype(int) - 1 + for i in range(ij_min[0], ij_max[0] + 1): + for j in range(ij_min[1], ij_max[1] + 1): + tile = self.tiles[i, j] + bbox_select = tile.bbox & bbox + slice_query = bbox_to_slice(bbox_select, canvas) + slice_tile = bbox_to_slice(bbox_select, tile) + raster[(slice(None),) + slice_query] = tile.raster[ + (slice(None),) + slice_tile + ] + canvas.raster = raster + return canvas + + def save(self, path: Path): + dump = { + "bbox": self.bbox.format(), + "tile_size": self.tile_size, + "ppm": self.ppm, + "groups": self.groups, + "tiles_bbox": {}, + "tiles_raster": {}, + } + if self.projection is not None: + dump["ref_latlonalt"] = self.projection.latlonalt + for ij, canvas in self.tiles.items(): + dump["tiles_bbox"][ij] = canvas.bbox.format() + raster_bytes = io.BytesIO() + raster = Image.fromarray(canvas.raster.transpose(1, 2, 0).astype(np.uint8)) + raster.save(raster_bytes, format="PNG") + dump["tiles_raster"][ij] = raster_bytes + with open(path, "wb") as fp: + pickle.dump(dump, fp) + + @classmethod + def load(cls, path: Path): + with path.open("rb") as fp: + dump = pickle.load(fp) + tiles = {} + for ij, bbox in dump["tiles_bbox"].items(): + tiles[ij] = Canvas(BoundaryBox.from_string(bbox), dump["ppm"]) + raster = np.asarray(Image.open(dump["tiles_raster"][ij])) + tiles[ij].raster = raster.transpose(2, 0, 1).copy() + projection = Projection(*dump["ref_latlonalt"]) + return cls( + tiles, + BoundaryBox.from_string(dump["bbox"]), + dump["tile_size"], + dump["ppm"], + projection, + dump["groups"], + ) + +class TileManager: + def __init__( + self, + tiles: Dict, + bbox: BoundaryBox, + tile_size: int, + ppm: int, + projection: Projection, + groups: Dict[str, List[str]], + map_data: Optional[MapData] = None, + ): + self.origin = bbox.min_ + self.bbox = bbox + self.tiles = tiles + self.tile_size = tile_size + self.ppm = ppm + self.projection = projection + self.groups = groups + self.map_data = map_data + assert np.all(tiles[0, 0].bbox.min_ == self.origin) + for tile in tiles.values(): + assert bbox.contains(tile.bbox) + + @classmethod + def from_bbox( + cls, + projection: Projection, + bbox: BoundaryBox, + ppm: float, + path: Optional[Path] = None, + tile_size: int = 128, + ): + bbox_osm = projection.unproject(bbox) + if path is not None and path.is_file(): + print(OSMData.from_file) + osm = OSMData.from_file(path) + if osm.box is not None: + assert osm.box.contains(bbox_osm) + else: + osm = OSMData.from_dict(get_osm(bbox_osm, path)) + + osm.add_xy_to_nodes(projection) + map_data = MapData.from_osm(osm) + map_index = MapIndex(map_data) + + bounds_x, bounds_y = [ + np.r_[np.arange(min_, max_, tile_size), max_] + for min_, max_ in zip(bbox.min_, bbox.max_) + ] + bbox_tiles = {} + for i, xmin in enumerate(bounds_x[:-1]): + for j, ymin in enumerate(bounds_y[:-1]): + bbox_tiles[i, j] = BoundaryBox( + [xmin, ymin], [bounds_x[i + 1], bounds_y[j + 1]] + ) + + tiles = {} + for ij, bbox_tile in bbox_tiles.items(): + canvas = Canvas(bbox_tile, ppm) + nodes, lines, areas = map_index.query(bbox_tile) + masks = render_raster_masks(nodes, lines, areas, canvas) + canvas.raster = render_raster_map(masks) + tiles[ij] = canvas + + groups = {k: v for k, v in vars(Groups).items() if not k.startswith("__")} + + return cls(tiles, bbox, tile_size, ppm, projection, groups, map_data) + + def query(self, bbox: BoundaryBox) -> Canvas: + bbox = round_bbox(bbox, self.bbox.min_, self.ppm) + canvas = Canvas(bbox, self.ppm) + raster = np.zeros((3, canvas.h, canvas.w), np.uint8) + + bbox_all = bbox & self.bbox + ij_min = np.floor((bbox_all.min_ - self.origin) / self.tile_size).astype(int) + ij_max = np.ceil((bbox_all.max_ - self.origin) / self.tile_size).astype(int) - 1 + for i in range(ij_min[0], ij_max[0] + 1): + for j in range(ij_min[1], ij_max[1] + 1): + tile = self.tiles[i, j] + bbox_select = tile.bbox & bbox + slice_query = bbox_to_slice(bbox_select, canvas) + slice_tile = bbox_to_slice(bbox_select, tile) + raster[(slice(None),) + slice_query] = tile.raster[ + (slice(None),) + slice_tile + ] + canvas.raster = raster + return canvas + + def save(self, path: Path): + dump = { + "bbox": self.bbox.format(), + "tile_size": self.tile_size, + "ppm": self.ppm, + "groups": self.groups, + "tiles_bbox": {}, + "tiles_raster": {}, + } + if self.projection is not None: + dump["ref_latlonalt"] = self.projection.latlonalt + for ij, canvas in self.tiles.items(): + dump["tiles_bbox"][ij] = canvas.bbox.format() + raster_bytes = io.BytesIO() + raster = Image.fromarray(canvas.raster.transpose(1, 2, 0).astype(np.uint8)) + raster.save(raster_bytes, format="PNG") + dump["tiles_raster"][ij] = raster_bytes + with open(path, "wb") as fp: + pickle.dump(dump, fp) + + @classmethod + def load(cls, path: Path): + with path.open("rb") as fp: + dump = pickle.load(fp) + tiles = {} + for ij, bbox in dump["tiles_bbox"].items(): + tiles[ij] = Canvas(BoundaryBox.from_string(bbox), dump["ppm"]) + raster = np.asarray(Image.open(dump["tiles_raster"][ij])) + tiles[ij].raster = raster.transpose(2, 0, 1).copy() + projection = Projection(*dump["ref_latlonalt"]) + return cls( + tiles, + BoundaryBox.from_string(dump["bbox"]), + dump["tile_size"], + dump["ppm"], + projection, + dump["groups"], + ) diff --git a/osm/viz.py b/osm/viz.py new file mode 100644 index 0000000000000000000000000000000000000000..cd99e3eda0049c2aae35d397018db73b2eb661ae --- /dev/null +++ b/osm/viz.py @@ -0,0 +1,159 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import plotly.graph_objects as go +import PIL.Image + +from utils.viz_2d import add_text +from .parser import Groups + + +class GeoPlotter: + def __init__(self, zoom=12, **kwargs): + self.fig = go.Figure() + self.fig.update_layout( + mapbox_style="open-street-map", + autosize=True, + mapbox_zoom=zoom, + margin={"r": 0, "t": 0, "l": 0, "b": 0}, + showlegend=True, + **kwargs, + ) + + def points(self, latlons, color, text=None, name=None, size=5, **kwargs): + latlons = np.asarray(latlons) + self.fig.add_trace( + go.Scattermapbox( + lat=latlons[..., 0], + lon=latlons[..., 1], + mode="markers", + text=text, + marker_color=color, + marker_size=size, + name=name, + **kwargs, + ) + ) + center = latlons.reshape(-1, 2).mean(0) + self.fig.update_layout( + mapbox_center=dict(zip(("lat", "lon"), center)), + ) + + def bbox(self, bbox, color, name=None, **kwargs): + corners = np.stack( + [bbox.min_, bbox.left_top, bbox.max_, bbox.right_bottom, bbox.min_] + ) + self.fig.add_trace( + go.Scattermapbox( + lat=corners[:, 0], + lon=corners[:, 1], + mode="lines", + marker_color=color, + name=name, + **kwargs, + ) + ) + self.fig.update_layout( + mapbox_center=dict(zip(("lat", "lon"), bbox.center)), + ) + + def raster(self, raster, bbox, below="traces", **kwargs): + if not np.issubdtype(raster.dtype, np.integer): + raster = (raster * 255).astype(np.uint8) + raster = PIL.Image.fromarray(raster) + corners = np.stack( + [ + bbox.min_, + bbox.left_top, + bbox.max_, + bbox.right_bottom, + ] + )[::-1, ::-1] + layers = [*self.fig.layout.mapbox.layers] + layers.append( + dict( + sourcetype="image", + source=raster, + coordinates=corners, + below=below, + **kwargs, + ) + ) + self.fig.layout.mapbox.layers = layers + + +map_colors = { + "building": (84, 155, 255), + "parking": (255, 229, 145), + "playground": (150, 133, 125), + "grass": (188, 255, 143), + "park": (0, 158, 16), + "forest": (0, 92, 9), + "water": (184, 213, 255), + "fence": (238, 0, 255), + "wall": (0, 0, 0), + "hedge": (107, 68, 48), + "kerb": (255, 234, 0), + "building_outline": (0, 0, 255), + "cycleway": (0, 251, 255), + "path": (8, 237, 0), + "road": (255, 0, 0), + "tree_row": (0, 92, 9), + "busway": (255, 128, 0), + "void": [int(255 * 0.9)] * 3, +} + + +class Colormap: + colors_areas = np.stack([map_colors[k] for k in ["void"] + Groups.areas]) + colors_ways = np.stack([map_colors[k] for k in ["void"] + Groups.ways]) + + @classmethod + def apply(cls, rasters): + return ( + np.where( + rasters[1, ..., None] > 0, + cls.colors_ways[rasters[1]], + cls.colors_areas[rasters[0]], + ) + / 255.0 + ) + + @classmethod + def add_colorbar(cls): + ax2 = plt.gcf().add_axes([1, 0.1, 0.02, 0.8]) + color_list = np.r_[cls.colors_areas[1:], cls.colors_ways[1:]] / 255.0 + cmap = mpl.colors.ListedColormap(color_list[::-1]) + ticks = np.linspace(0, 1, len(color_list), endpoint=False) + ticks += 1 / len(color_list) / 2 + cb = mpl.colorbar.ColorbarBase( + ax2, + cmap=cmap, + orientation="vertical", + ticks=ticks, + ) + cb.set_ticklabels((Groups.areas + Groups.ways)[::-1]) + ax2.tick_params(labelsize=15) + + +def plot_nodes(idx, raster, fontsize=8, size=15): + ax = plt.gcf().axes[idx] + ax.autoscale(enable=False) + nodes_xy = np.stack(np.where(raster > 0)[::-1], -1) + nodes_val = raster[tuple(nodes_xy.T[::-1])] - 1 + ax.scatter(*nodes_xy.T, c="k", s=size) + for xy, val in zip(nodes_xy, nodes_val): + group = Groups.nodes[val] + add_text( + idx, + group, + xy + 2, + lcolor=None, + fs=fontsize, + color="k", + normalized=False, + ha="center", + ) + plt.show() diff --git a/utils/exif.py b/utils/exif.py new file mode 100644 index 0000000000000000000000000000000000000000..c272019b5673dc1e3aab8a3e0e21b630cc629154 --- /dev/null +++ b/utils/exif.py @@ -0,0 +1,356 @@ +"""Copied from opensfm.exif to minimize hard dependencies.""" +from pathlib import Path +import json +import datetime +import logging +from codecs import encode, decode +from typing import Any, Dict, Optional, Tuple + +import exifread + +logger: logging.Logger = logging.getLogger(__name__) + +inch_in_mm = 25.4 +cm_in_mm = 10 +um_in_mm = 0.001 +default_projection = "perspective" +maximum_altitude = 1e4 + + +def sensor_data(): + with (Path(__file__).parent / "sensor_data.json").open() as fid: + data = json.load(fid) + return {k.lower(): v for k, v in data.items()} + + +def eval_frac(value) -> Optional[float]: + try: + return float(value.num) / float(value.den) + except ZeroDivisionError: + return None + + +def gps_to_decimal(values, reference) -> Optional[float]: + sign = 1 if reference in "NE" else -1 + degrees = eval_frac(values[0]) + minutes = eval_frac(values[1]) + seconds = eval_frac(values[2]) + if degrees is not None and minutes is not None and seconds is not None: + return sign * (degrees + minutes / 60 + seconds / 3600) + return None + + +def get_tag_as_float(tags, key, index: int = 0) -> Optional[float]: + if key in tags: + val = tags[key].values[index] + if isinstance(val, exifread.utils.Ratio): + ret_val = eval_frac(val) + if ret_val is None: + logger.error( + 'The rational "{2}" of tag "{0:s}" at index {1:d} c' + "aused a division by zero error".format(key, index, val) + ) + return ret_val + else: + return float(val) + else: + return None + + +def compute_focal( + focal_35: Optional[float], focal: Optional[float], sensor_width, sensor_string +) -> Tuple[float, float]: + if focal_35 is not None and focal_35 > 0: + focal_ratio = focal_35 / 36.0 # 35mm film produces 36x24mm pictures. + else: + if not sensor_width: + sensor_width = sensor_data().get(sensor_string, None) + if sensor_width and focal: + focal_ratio = focal / sensor_width + focal_35 = 36.0 * focal_ratio + else: + focal_35 = 0.0 + focal_ratio = 0.0 + return focal_35, focal_ratio + + +def sensor_string(make: str, model: str) -> str: + if make != "unknown": + # remove duplicate 'make' information in 'model' + model = model.replace(make, "") + return (make.strip() + " " + model.strip()).strip().lower() + + +def unescape_string(s) -> str: + return decode(encode(s, "latin-1", "backslashreplace"), "unicode-escape") + + +class EXIF: + def __init__( + self, fileobj, image_size_loader, use_exif_size=True, name=None + ) -> None: + self.image_size_loader = image_size_loader + self.use_exif_size = use_exif_size + self.fileobj = fileobj + self.tags = exifread.process_file(fileobj, details=False) + fileobj.seek(0) + self.fileobj_name = self.fileobj.name if name is None else name + + def extract_image_size(self) -> Tuple[int, int]: + if ( + self.use_exif_size + and "EXIF ExifImageWidth" in self.tags + and "EXIF ExifImageLength" in self.tags + ): + width, height = ( + int(self.tags["EXIF ExifImageWidth"].values[0]), + int(self.tags["EXIF ExifImageLength"].values[0]), + ) + elif ( + self.use_exif_size + and "Image ImageWidth" in self.tags + and "Image ImageLength" in self.tags + ): + width, height = ( + int(self.tags["Image ImageWidth"].values[0]), + int(self.tags["Image ImageLength"].values[0]), + ) + else: + height, width = self.image_size_loader() + return width, height + + def _decode_make_model(self, value) -> str: + """Python 2/3 compatible decoding of make/model field.""" + if hasattr(value, "decode"): + try: + return value.decode("utf-8") + except UnicodeDecodeError: + return "unknown" + else: + return value + + def extract_make(self) -> str: + # Camera make and model + if "EXIF LensMake" in self.tags: + make = self.tags["EXIF LensMake"].values + elif "Image Make" in self.tags: + make = self.tags["Image Make"].values + else: + make = "unknown" + return self._decode_make_model(make) + + def extract_model(self) -> str: + if "EXIF LensModel" in self.tags: + model = self.tags["EXIF LensModel"].values + elif "Image Model" in self.tags: + model = self.tags["Image Model"].values + else: + model = "unknown" + return self._decode_make_model(model) + + def extract_focal(self) -> Tuple[float, float]: + make, model = self.extract_make(), self.extract_model() + focal_35, focal_ratio = compute_focal( + get_tag_as_float(self.tags, "EXIF FocalLengthIn35mmFilm"), + get_tag_as_float(self.tags, "EXIF FocalLength"), + self.extract_sensor_width(), + sensor_string(make, model), + ) + return focal_35, focal_ratio + + def extract_sensor_width(self) -> Optional[float]: + """Compute sensor with from width and resolution.""" + if ( + "EXIF FocalPlaneResolutionUnit" not in self.tags + or "EXIF FocalPlaneXResolution" not in self.tags + ): + return None + resolution_unit = self.tags["EXIF FocalPlaneResolutionUnit"].values[0] + mm_per_unit = self.get_mm_per_unit(resolution_unit) + if not mm_per_unit: + return None + pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneXResolution") + if pixels_per_unit is None: + return None + if pixels_per_unit <= 0.0: + pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneYResolution") + if pixels_per_unit is None or pixels_per_unit <= 0.0: + return None + units_per_pixel = 1 / pixels_per_unit + width_in_pixels = self.extract_image_size()[0] + return width_in_pixels * units_per_pixel * mm_per_unit + + def get_mm_per_unit(self, resolution_unit) -> Optional[float]: + """Length of a resolution unit in millimeters. + + Uses the values from the EXIF specs in + https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/EXIF.html + + Args: + resolution_unit: the resolution unit value given in the EXIF + """ + if resolution_unit == 2: # inch + return inch_in_mm + elif resolution_unit == 3: # cm + return cm_in_mm + elif resolution_unit == 4: # mm + return 1 + elif resolution_unit == 5: # um + return um_in_mm + else: + logger.warning( + "Unknown EXIF resolution unit value: {}".format(resolution_unit) + ) + return None + + def extract_orientation(self) -> int: + orientation = 1 + if "Image Orientation" in self.tags: + value = self.tags.get("Image Orientation").values[0] + if type(value) == int and value != 0: + orientation = value + return orientation + + def extract_ref_lon_lat(self) -> Tuple[str, str]: + if "GPS GPSLatitudeRef" in self.tags: + reflat = self.tags["GPS GPSLatitudeRef"].values + else: + reflat = "N" + if "GPS GPSLongitudeRef" in self.tags: + reflon = self.tags["GPS GPSLongitudeRef"].values + else: + reflon = "E" + return reflon, reflat + + def extract_lon_lat(self) -> Tuple[Optional[float], Optional[float]]: + if "GPS GPSLatitude" in self.tags: + reflon, reflat = self.extract_ref_lon_lat() + lat = gps_to_decimal(self.tags["GPS GPSLatitude"].values, reflat) + lon = gps_to_decimal(self.tags["GPS GPSLongitude"].values, reflon) + else: + lon, lat = None, None + return lon, lat + + def extract_altitude(self) -> Optional[float]: + if "GPS GPSAltitude" in self.tags: + alt_value = self.tags["GPS GPSAltitude"].values[0] + if isinstance(alt_value, exifread.utils.Ratio): + altitude = eval_frac(alt_value) + elif isinstance(alt_value, int): + altitude = float(alt_value) + else: + altitude = None + + # Check if GPSAltitudeRef is equal to 1, which means GPSAltitude should be negative, reference: http://www.exif.org/Exif2-2.PDF#page=53 + if ( + "GPS GPSAltitudeRef" in self.tags + and self.tags["GPS GPSAltitudeRef"].values[0] == 1 + and altitude is not None + ): + altitude = -altitude + else: + altitude = None + return altitude + + def extract_dop(self) -> Optional[float]: + if "GPS GPSDOP" in self.tags: + return eval_frac(self.tags["GPS GPSDOP"].values[0]) + return None + + def extract_geo(self) -> Dict[str, Any]: + altitude = self.extract_altitude() + dop = self.extract_dop() + lon, lat = self.extract_lon_lat() + d = {} + + if lon is not None and lat is not None: + d["latitude"] = lat + d["longitude"] = lon + if altitude is not None: + d["altitude"] = min([maximum_altitude, altitude]) + if dop is not None: + d["dop"] = dop + return d + + def extract_capture_time(self) -> float: + if ( + "GPS GPSDate" in self.tags + and "GPS GPSTimeStamp" in self.tags # Actually GPSDateStamp + ): + try: + hours_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 0) + minutes_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 1) + if hours_f is None or minutes_f is None: + raise TypeError + hours = int(hours_f) + minutes = int(minutes_f) + seconds = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 2) + gps_timestamp_string = "{0:s} {1:02d}:{2:02d}:{3:02f}".format( + self.tags["GPS GPSDate"].values, hours, minutes, seconds + ) + return ( + datetime.datetime.strptime( + gps_timestamp_string, "%Y:%m:%d %H:%M:%S.%f" + ) + - datetime.datetime(1970, 1, 1) + ).total_seconds() + except (TypeError, ValueError): + logger.info( + 'The GPS time stamp in image file "{0:s}" is invalid. ' + "Falling back to DateTime*".format(self.fileobj_name) + ) + + time_strings = [ + ("EXIF DateTimeOriginal", "EXIF SubSecTimeOriginal", "EXIF Tag 0x9011"), + ("EXIF DateTimeDigitized", "EXIF SubSecTimeDigitized", "EXIF Tag 0x9012"), + ("Image DateTime", "Image SubSecTime", "Image Tag 0x9010"), + ] + for datetime_tag, subsec_tag, offset_tag in time_strings: + if datetime_tag in self.tags: + date_time = self.tags[datetime_tag].values + if subsec_tag in self.tags: + subsec_time = self.tags[subsec_tag].values + else: + subsec_time = "0" + try: + s = "{0:s}.{1:s}".format(date_time, subsec_time) + d = datetime.datetime.strptime(s, "%Y:%m:%d %H:%M:%S.%f") + except ValueError: + logger.debug( + 'The "{1:s}" time stamp or "{2:s}" tag is invalid in ' + 'image file "{0:s}"'.format( + self.fileobj_name, datetime_tag, subsec_tag + ) + ) + continue + # Test for OffsetTimeOriginal | OffsetTimeDigitized | OffsetTime + if offset_tag in self.tags: + offset_time = self.tags[offset_tag].values + try: + d += datetime.timedelta( + hours=-int(offset_time[0:3]), minutes=int(offset_time[4:6]) + ) + except (TypeError, ValueError): + logger.debug( + 'The "{0:s}" time zone offset in image file "{1:s}"' + " is invalid".format(offset_tag, self.fileobj_name) + ) + logger.debug( + 'Naively assuming UTC on "{0:s}" in image file ' + '"{1:s}"'.format(datetime_tag, self.fileobj_name) + ) + else: + logger.debug( + "No GPS time stamp and no time zone offset in image " + 'file "{0:s}"'.format(self.fileobj_name) + ) + logger.debug( + 'Naively assuming UTC on "{0:s}" in image file "{1:s}"'.format( + datetime_tag, self.fileobj_name + ) + ) + return (d - datetime.datetime(1970, 1, 1)).total_seconds() + logger.info( + 'Image file "{0:s}" has no valid time stamp'.format(self.fileobj_name) + ) + return 0.0 diff --git a/utils/geo.py b/utils/geo.py new file mode 100644 index 0000000000000000000000000000000000000000..7f97e501f81092b57fc3ad043aa520779b44faeb --- /dev/null +++ b/utils/geo.py @@ -0,0 +1,130 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +from typing import Union + +import numpy as np +import torch + +from .geo_opensfm import TopocentricConverter + + +class BoundaryBox: + def __init__(self, min_: np.ndarray, max_: np.ndarray): + self.min_ = np.asarray(min_) + self.max_ = np.asarray(max_) + assert np.all(self.min_ <= self.max_) + + @classmethod + def from_string(cls, string: str): + return cls(*np.split(np.array(string.split(","), float), 2)) + + @property + def left_top(self): + return np.stack([self.min_[..., 0], self.max_[..., 1]], -1) + + @property + def right_bottom(self) -> (np.ndarray, np.ndarray): + return np.stack([self.max_[..., 0], self.min_[..., 1]], -1) + + @property + def center(self) -> np.ndarray: + return (self.min_ + self.max_) / 2 + + @property + def size(self) -> np.ndarray: + return self.max_ - self.min_ + + def translate(self, t: float): + return self.__class__(self.min_ + t, self.max_ + t) + + def contains(self, xy: Union[np.ndarray, "BoundaryBox"]): + if isinstance(xy, self.__class__): + return self.contains(xy.min_) and self.contains(xy.max_) + return np.all((xy >= self.min_) & (xy <= self.max_), -1) + + def normalize(self, xy): + min_, max_ = self.min_, self.max_ + if isinstance(xy, torch.Tensor): + min_ = torch.from_numpy(min_).to(xy) + max_ = torch.from_numpy(max_).to(xy) + return (xy - min_) / (max_ - min_) + + def unnormalize(self, xy): + min_, max_ = self.min_, self.max_ + if isinstance(xy, torch.Tensor): + min_ = torch.from_numpy(min_).to(xy) + max_ = torch.from_numpy(max_).to(xy) + return xy * (max_ - min_) + min_ + + def format(self) -> str: + return ",".join(np.r_[self.min_, self.max_].astype(str)) + + def __add__(self, x): + if isinstance(x, (int, float)): + return self.__class__(self.min_ - x, self.max_ + x) + else: + raise TypeError(f"Cannot add {self.__class__.__name__} to {type(x)}.") + + def __and__(self, other): + return self.__class__( + np.maximum(self.min_, other.min_), np.minimum(self.max_, other.max_) + ) + + def __repr__(self): + return self.format() + + +class Projection: + def __init__(self, lat, lon, alt=0, max_extent=25e3): + # The approximation error is |L - radius * tan(L / radius)| + # and is around 13cm for L=25km. + self.latlonalt = (lat, lon, alt) + self.converter = TopocentricConverter(lat, lon, alt) + min_ = self.converter.to_lla(*(-max_extent,) * 2, 0)[:2] + max_ = self.converter.to_lla(*(max_extent,) * 2, 0)[:2] + self.bounds = BoundaryBox(min_, max_) + + @classmethod + def from_points(cls, all_latlon): + assert all_latlon.shape[-1] == 2 + all_latlon = all_latlon.reshape(-1, 2) + latlon_mid = (all_latlon.min(0) + all_latlon.max(0)) / 2 + return cls(*latlon_mid) + + def check_bbox(self, bbox: BoundaryBox): + if self.bounds is not None and not self.bounds.contains(bbox): + raise ValueError( + f"Bbox {bbox.format()} is not contained in " + f"projection with bounds {self.bounds.format()}." + ) + + def project(self, geo, return_z=False): + if isinstance(geo, BoundaryBox): + return BoundaryBox(*self.project(np.stack([geo.min_, geo.max_]))) + geo = np.asarray(geo) + assert geo.shape[-1] in (2, 3) + if self.bounds is not None: + if not np.all(self.bounds.contains(geo[..., :2])): + raise ValueError( + f"Points {geo} are out of the valid bounds " + f"{self.bounds.format()}." + ) + lat, lon = geo[..., 0], geo[..., 1] + if geo.shape[-1] == 3: + alt = geo[..., -1] + else: + alt = np.zeros_like(lat) + x, y, z = self.converter.to_topocentric(lat, lon, alt) + return np.stack([x, y] + ([z] if return_z else []), -1) + + def unproject(self, xy, return_z=False): + if isinstance(xy, BoundaryBox): + return BoundaryBox(*self.unproject(np.stack([xy.min_, xy.max_]))) + xy = np.asarray(xy) + x, y = xy[..., 0], xy[..., 1] + if xy.shape[-1] == 3: + z = xy[..., -1] + else: + z = np.zeros_like(x) + lat, lon, alt = self.converter.to_lla(x, y, z) + return np.stack([lat, lon] + ([alt] if return_z else []), -1) diff --git a/utils/geo_opensfm.py b/utils/geo_opensfm.py new file mode 100644 index 0000000000000000000000000000000000000000..d42145236dd4c65a8764cbe37fa25c527814017e --- /dev/null +++ b/utils/geo_opensfm.py @@ -0,0 +1,180 @@ +"""Copied from opensfm.geo to minimize hard dependencies.""" +import numpy as np +from numpy import ndarray +from typing import Tuple + +WGS84_a = 6378137.0 +WGS84_b = 6356752.314245 + + +def ecef_from_lla(lat, lon, alt: float) -> Tuple[float, ...]: + """ + Compute ECEF XYZ from latitude, longitude and altitude. + + All using the WGS84 model. + Altitude is the distance to the WGS84 ellipsoid. + Check results here http://www.oc.nps.edu/oc2902w/coord/llhxyz.htm + + >>> lat, lon, alt = 10, 20, 30 + >>> x, y, z = ecef_from_lla(lat, lon, alt) + >>> np.allclose(lla_from_ecef(x,y,z), [lat, lon, alt]) + True + """ + a2 = WGS84_a**2 + b2 = WGS84_b**2 + lat = np.radians(lat) + lon = np.radians(lon) + L = 1.0 / np.sqrt(a2 * np.cos(lat) ** 2 + b2 * np.sin(lat) ** 2) + x = (a2 * L + alt) * np.cos(lat) * np.cos(lon) + y = (a2 * L + alt) * np.cos(lat) * np.sin(lon) + z = (b2 * L + alt) * np.sin(lat) + return x, y, z + + +def lla_from_ecef(x, y, z): + """ + Compute latitude, longitude and altitude from ECEF XYZ. + + All using the WGS84 model. + Altitude is the distance to the WGS84 ellipsoid. + """ + a = WGS84_a + b = WGS84_b + ea = np.sqrt((a**2 - b**2) / a**2) + eb = np.sqrt((a**2 - b**2) / b**2) + p = np.sqrt(x**2 + y**2) + theta = np.arctan2(z * a, p * b) + lon = np.arctan2(y, x) + lat = np.arctan2( + z + eb**2 * b * np.sin(theta) ** 3, p - ea**2 * a * np.cos(theta) ** 3 + ) + N = a / np.sqrt(1 - ea**2 * np.sin(lat) ** 2) + alt = p / np.cos(lat) - N + return np.degrees(lat), np.degrees(lon), alt + + +def ecef_from_topocentric_transform(lat, lon, alt: float) -> ndarray: + """ + Transformation from a topocentric frame at reference position to ECEF. + + The topocentric reference frame is a metric one with the origin + at the given (lat, lon, alt) position, with the X axis heading east, + the Y axis heading north and the Z axis vertical to the ellipsoid. + >>> a = ecef_from_topocentric_transform(30, 20, 10) + >>> b = ecef_from_topocentric_transform_finite_diff(30, 20, 10) + >>> np.allclose(a, b) + True + """ + x, y, z = ecef_from_lla(lat, lon, alt) + sa = np.sin(np.radians(lat)) + ca = np.cos(np.radians(lat)) + so = np.sin(np.radians(lon)) + co = np.cos(np.radians(lon)) + return np.array( + [ + [-so, -sa * co, ca * co, x], + [co, -sa * so, ca * so, y], + [0, ca, sa, z], + [0, 0, 0, 1], + ] + ) + + +def ecef_from_topocentric_transform_finite_diff(lat, lon, alt: float) -> ndarray: + """ + Transformation from a topocentric frame at reference position to ECEF. + + The topocentric reference frame is a metric one with the origin + at the given (lat, lon, alt) position, with the X axis heading east, + the Y axis heading north and the Z axis vertical to the ellipsoid. + """ + eps = 1e-2 + x, y, z = ecef_from_lla(lat, lon, alt) + v1 = ( + ( + np.array(ecef_from_lla(lat, lon + eps, alt)) + - np.array(ecef_from_lla(lat, lon - eps, alt)) + ) + / 2 + / eps + ) + v2 = ( + ( + np.array(ecef_from_lla(lat + eps, lon, alt)) + - np.array(ecef_from_lla(lat - eps, lon, alt)) + ) + / 2 + / eps + ) + v3 = ( + ( + np.array(ecef_from_lla(lat, lon, alt + eps)) + - np.array(ecef_from_lla(lat, lon, alt - eps)) + ) + / 2 + / eps + ) + v1 /= np.linalg.norm(v1) + v2 /= np.linalg.norm(v2) + v3 /= np.linalg.norm(v3) + return np.array( + [ + [v1[0], v2[0], v3[0], x], + [v1[1], v2[1], v3[1], y], + [v1[2], v2[2], v3[2], z], + [0, 0, 0, 1], + ] + ) + + +def topocentric_from_lla(lat, lon, alt: float, reflat, reflon, refalt: float): + """ + Transform from lat, lon, alt to topocentric XYZ. + + >>> lat, lon, alt = -10, 20, 100 + >>> np.allclose(topocentric_from_lla(lat, lon, alt, lat, lon, alt), + ... [0,0,0]) + True + >>> x, y, z = topocentric_from_lla(lat, lon, alt, 0, 0, 0) + >>> np.allclose(lla_from_topocentric(x, y, z, 0, 0, 0), + ... [lat, lon, alt]) + True + """ + T = np.linalg.inv(ecef_from_topocentric_transform(reflat, reflon, refalt)) + x, y, z = ecef_from_lla(lat, lon, alt) + tx = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3] + ty = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3] + tz = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3] + return tx, ty, tz + + +def lla_from_topocentric(x, y, z, reflat, reflon, refalt: float): + """ + Transform from topocentric XYZ to lat, lon, alt. + """ + T = ecef_from_topocentric_transform(reflat, reflon, refalt) + ex = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3] + ey = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3] + ez = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3] + return lla_from_ecef(ex, ey, ez) + + +class TopocentricConverter(object): + """Convert to and from a topocentric reference frame.""" + + def __init__(self, reflat, reflon, refalt): + """Init the converter given the reference origin.""" + self.lat = reflat + self.lon = reflon + self.alt = refalt + + def to_topocentric(self, lat, lon, alt): + """Convert lat, lon, alt to topocentric x, y, z.""" + return topocentric_from_lla(lat, lon, alt, self.lat, self.lon, self.alt) + + def to_lla(self, x, y, z): + """Convert topocentric x, y, z to lat, lon, alt.""" + return lla_from_topocentric(x, y, z, self.lat, self.lon, self.alt) + + def __eq__(self, o): + return np.allclose([self.lat, self.lon, self.alt], (o.lat, o.lon, o.alt)) diff --git a/utils/geometry.py b/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..9bcbcba7c41e689e9dd9e35fe33e7787fdd13b03 --- /dev/null +++ b/utils/geometry.py @@ -0,0 +1,68 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import numpy as np +import torch + + +def from_homogeneous(points, eps: float = 1e-8): + """Remove the homogeneous dimension of N-dimensional points. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N+1). + Returns: + A torch.Tensor or numpy ndarray with size (..., N). + """ + return points[..., :-1] / (points[..., -1:] + eps) + + +def to_homogeneous(points): + """Convert N-dimensional points to homogeneous coordinates. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N). + Returns: + A torch.Tensor or numpy.ndarray with size (..., N+1). + """ + if isinstance(points, torch.Tensor): + pad = points.new_ones(points.shape[:-1] + (1,)) + return torch.cat([points, pad], dim=-1) + elif isinstance(points, np.ndarray): + pad = np.ones((points.shape[:-1] + (1,)), dtype=points.dtype) + return np.concatenate([points, pad], axis=-1) + else: + raise ValueError + + +@torch.jit.script +def undistort_points(pts, dist): + dist = dist.unsqueeze(-2) # add point dimension + ndist = dist.shape[-1] + undist = pts + valid = torch.ones(pts.shape[:-1], device=pts.device, dtype=torch.bool) + if ndist > 0: + k1, k2 = dist[..., :2].split(1, -1) + r2 = torch.sum(pts**2, -1, keepdim=True) + radial = k1 * r2 + k2 * r2**2 + undist = undist + pts * radial + + # The distortion model is supposedly only valid within the image + # boundaries. Because of the negative radial distortion, points that + # are far outside of the boundaries might actually be mapped back + # within the image. To account for this, we discard points that are + # beyond the inflection point of the distortion model, + # e.g. such that d(r + k_1 r^3 + k2 r^5)/dr = 0 + limited = ((k2 > 0) & ((9 * k1**2 - 20 * k2) > 0)) | ((k2 <= 0) & (k1 > 0)) + limit = torch.abs( + torch.where( + k2 > 0, + (torch.sqrt(9 * k1**2 - 20 * k2) - 3 * k1) / (10 * k2), + 1 / (3 * k1), + ) + ) + valid = valid & torch.squeeze(~limited | (r2 < limit), -1) + + if ndist > 2: + p12 = dist[..., 2:] + p21 = p12.flip(-1) + uv = torch.prod(pts, -1, keepdim=True) + undist = undist + 2 * p12 * uv + p21 * (r2 + 2 * pts**2) + + return undist, valid diff --git a/utils/io.py b/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..e092ef8695a41f2fae6da4e03d7cad74eab5cadf --- /dev/null +++ b/utils/io.py @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import json +import requests +import shutil +from pathlib import Path + +import cv2 +import numpy as np +import torch +from tqdm.auto import tqdm + +import logger + +DATA_URL = "https://cvg-data.inf.ethz.ch/OrienterNet_CVPR2023" + + +def read_image(path, grayscale=False): + if grayscale: + mode = cv2.IMREAD_GRAYSCALE + else: + mode = cv2.IMREAD_COLOR + image = cv2.imread(str(path), mode) + if image is None: + raise ValueError(f"Cannot read image {path}.") + if not grayscale and len(image.shape) == 3: + image = np.ascontiguousarray(image[:, :, ::-1]) # BGR to RGB + return image + + +def write_torch_image(path, image): + image_cv2 = np.round(image.clip(0, 1) * 255).astype(int)[..., ::-1] + cv2.imwrite(str(path), image_cv2) + + +class JSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, (np.ndarray, torch.Tensor)): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + return json.JSONEncoder.default(self, obj) + + +def write_json(path, data): + with open(path, "w") as f: + json.dump(data, f, cls=JSONEncoder) + + +def download_file(url, path): + path = Path(path) + if path.is_dir(): + path = path / Path(url).name + path.parent.mkdir(exist_ok=True, parents=True) + logger.info("Downloading %s to %s.", url, path) + with requests.get(url, stream=True) as r: + total_length = int(r.headers.get("Content-Length")) + with tqdm.wrapattr(r.raw, "read", total=total_length, desc="") as raw: + with open(path, "wb") as output: + shutil.copyfileobj(raw, output) + return path diff --git a/utils/sensor_data.json b/utils/sensor_data.json new file mode 100644 index 0000000000000000000000000000000000000000..ca6a23922096b17678929ecd0faa23ac43725b37 --- /dev/null +++ b/utils/sensor_data.json @@ -0,0 +1,3711 @@ +{ + "Acer CE-5330": 5.75, + "Acer CE-5430": 5.75, + "Acer CE-6430": 5.75, + "Acer CI-6330": 7.11, + "Acer CI-6530": 7.11, + "Acer CI-8330": 7.11, + "Acer CL-5300": 5.75, + "Acer CP-8531": 7.11, + "Acer CP-8660": 7.11, + "Acer CR-5130": 7.11, + "Acer CR-6530": 7.11, + "Acer CR-8530": 7.11, + "Acer CS-5530": 5.75, + "Acer CS-5531": 5.75, + "Acer CS-6530": 5.75, + "Acer CS-6531": 5.75, + "Acer CU-6530": 5.75, + "AeroVironment Quantix": 6.08, + "AgfaPhoto Compact 100": 5.75, + "AgfaPhoto Compact 102": 6.16, + "AgfaPhoto Compact 103": 6.08, + "AgfaPhoto DC-1030i": 7.11, + "AgfaPhoto DC-1033m": 5.75, + "AgfaPhoto DC-1033x": 7.11, + "AgfaPhoto DC-1338i": 7.11, + "AgfaPhoto DC-1338sT": 7.11, + "AgfaPhoto DC-2030m": 6.16, + "AgfaPhoto DC-302": 4.8, + "AgfaPhoto DC-500": 4.8, + "AgfaPhoto DC-530i": 5.75, + "AgfaPhoto DC-533": 5.75, + "AgfaPhoto DC-600uw": 6.16, + "AgfaPhoto DC-630": 5.75, + "AgfaPhoto DC-630i": 5.75, + "AgfaPhoto DC-630x": 5.75, + "AgfaPhoto DC-633x": 5.75, + "AgfaPhoto DC-633xs": 5.75, + "AgfaPhoto DC-730i": 5.75, + "AgfaPhoto DC-733i": 5.75, + "AgfaPhoto DC-733s": 5.75, + "AgfaPhoto DC-735": 5.75, + "AgfaPhoto DC-735i": 5.75, + "AgfaPhoto DC-738i": 5.75, + "AgfaPhoto DC-830": 5.75, + "AgfaPhoto DC-830i": 7.11, + "AgfaPhoto DC-8330i": 5.75, + "AgfaPhoto DC-8338i": 5.75, + "AgfaPhoto DC-833m": 5.75, + "AgfaPhoto DC-8428s": 7.11, + "AgfaPhoto Optima 1": 6.08, + "AgfaPhoto Optima 100": 6.08, + "AgfaPhoto Optima 102": 6.08, + "AgfaPhoto Optima 103": 6.16, + "AgfaPhoto Optima 104": 6.08, + "AgfaPhoto Optima 105": 6.16, + "AgfaPhoto Optima 1338mT": 6.16, + "AgfaPhoto Optima 1438m": 6.08, + "AgfaPhoto Optima 3": 6.16, + "AgfaPhoto Optima 830UW": 5.75, + "AgfaPhoto Optima 8328m": 5.75, + "AgfaPhoto ePhoto 1280": 6.16, + "AgfaPhoto ePhoto 1680": 6.16, + "AgfaPhoto ePhoto CL18": 6.16, + "AgfaPhoto ePhoto CL30": 6.16, + "AgfaPhoto ePhoto CL30 Clik!": 6.16, + "AgfaPhoto ePhoto CL45": 6.16, + "AgfaPhoto ePhoto CL50": 6.16, + "AgfaPhoto sensor 505-D": 5.75, + "AgfaPhoto sensor 505-X": 5.75, + "AgfaPhoto sensor 530s": 5.75, + "AgfaPhoto sensor 830s": 5.75, + "Apple iPhone 4S": 4.57, + "Apple iPhone6,1": 4.89, + "Apple iPad 3": 4.54, + "Apple iPhone 3,1": 4.74, + "Apple iPhone 4": 4.74, + "Apple iPhone 5": 4.57, + "Apple iPhone 5S": 4.89, + "Apple iPhone 6": 4.89, + "Apple iPhone 6 Plus": 4.89, + "Apple iPhone3": 4.74, + "Apple iPhone4": 4.57, + "Apple iPhone3,1": 4.74, + "Apple iPhone4,1": 4.57, + "Apple iPhone4S": 4.57, + "Apple iPhone5,1": 4.57, + "Apple iPhone5,2": 4.57, + "Apple iPhone5,3": 4.57, + "Apple iPhone5,4": 4.57, + "Apple iPhone6": 4.89, + "Apple iPhone6,2": 4.89, + "BenQ AC100": 6.16, + "BenQ AE100": 6.16, + "BenQ C1420": 6.08, + "BenQ DC 2300": 4.5, + "BenQ DC 2410": 5.33, + "BenQ DC 3400": 4.5, + "BenQ DC 3410": 4.5, + "BenQ DC 4330": 5.75, + "BenQ DC 4500": 5.75, + "BenQ DC 5330": 5.75, + "BenQ DC C1000": 7.11, + "BenQ DC C1020": 6.16, + "BenQ DC C1030 Eco": 6.16, + "BenQ DC C1035": 6.16, + "BenQ DC C1050": 7.53, + "BenQ DC C1060": 6.16, + "BenQ DC C1220": 6.16, + "BenQ DC C1230": 6.16, + "BenQ DC C1250": 6.16, + "BenQ DC C1255": 6.16, + "BenQ DC C1430": 6.16, + "BenQ DC C1450": 6.16, + "BenQ DC C1460": 6.08, + "BenQ DC C1480": 6.16, + "BenQ DC C30": 5.75, + "BenQ DC C35": 5.75, + "BenQ DC C40": 5.75, + "BenQ DC C420": 5.75, + "BenQ DC C50": 7.11, + "BenQ DC C500": 5.75, + "BenQ DC C51": 5.75, + "BenQ DC C510": 5.75, + "BenQ DC C520": 5.75, + "BenQ DC C530": 5.75, + "BenQ DC C540": 5.75, + "BenQ DC C60": 7.11, + "BenQ DC C610": 5.75, + "BenQ DC C62": 7.11, + "BenQ DC C630": 5.75, + "BenQ DC C640": 7.53, + "BenQ DC C740": 5.75, + "BenQ DC C740i": 5.75, + "BenQ DC C750": 5.75, + "BenQ DC C800": 7.11, + "BenQ DC C840": 5.75, + "BenQ DC C850": 5.75, + "BenQ DC E1000": 7.11, + "BenQ DC E1020": 6.16, + "BenQ DC E1030": 6.16, + "BenQ DC E1035": 6.16, + "BenQ DC E1050": 7.53, + "BenQ DC E1050t": 6.16, + "BenQ DC E1220": 6.16, + "BenQ DC E1230": 6.16, + "BenQ DC E1240": 6.16, + "BenQ DC E1250": 6.16, + "BenQ DC E1260": 6.16, + "BenQ DC E1280": 6.16, + "BenQ DC E1420": 6.16, + "BenQ DC E1430": 6.16, + "BenQ DC E1460": 6.16, + "BenQ DC E1465": 6.16, + "BenQ DC E30": 6.4, + "BenQ DC E300": 6.4, + "BenQ DC E310": 6.4, + "BenQ DC E40": 5.75, + "BenQ DC E41": 5.75, + "BenQ DC E43": 5.75, + "BenQ DC E510": 5.75, + "BenQ DC E520": 5.75, + "BenQ DC E520 plus": 5.75, + "BenQ DC E53": 5.75, + "BenQ DC E600": 5.75, + "BenQ DC E605": 5.75, + "BenQ DC E610": 5.75, + "BenQ DC E63 Plus": 5.75, + "BenQ DC E720": 5.75, + "BenQ DC E800": 5.75, + "BenQ DC E820": 5.75, + "BenQ DC L1020": 6.16, + "BenQ DC P1410": 6.16, + "BenQ DC P500": 5.75, + "BenQ DC P860": 7.11, + "BenQ DC S1430": 6.08, + "BenQ DC S30": 5.33, + "BenQ DC S40": 5.75, + "BenQ DC T1260": 6.16, + "BenQ DC T700": 5.75, + "BenQ DC T800": 6.03, + "BenQ DC T850": 5.75, + "BenQ DC W1220": 6.16, + "BenQ DC X600": 5.75, + "BenQ DC X710": 5.75, + "BenQ DC X720": 5.75, + "BenQ DC X725": 5.75, + "BenQ DC X735": 5.75, + "BenQ DC X800": 5.75, + "BenQ DC X835": 5.75, + "BenQ E1480": 6.16, + "BenQ G1": 6.16, + "BenQ GH200": 6.16, + "BenQ GH600": 6.16, + "BenQ GH700": 6.16, + "BenQ LM100": 6.16, + "BenQ S1410": 6.08, + "BenQ S1420": 6.16, + "BenQ T1460": 6.16, + "Canon Digital IXUS": 5.33, + "Canon Digital IXUS 100 IS": 6.16, + "Canon Digital IXUS 110 IS": 6.16, + "Canon Digital IXUS 200 IS": 6.16, + "Canon Digital IXUS 300": 5.33, + "Canon Digital IXUS 330": 5.33, + "Canon Digital IXUS 40": 5.75, + "Canon Digital IXUS 400": 7.11, + "Canon Digital IXUS 430": 7.11, + "Canon Digital IXUS 50": 5.75, + "Canon Digital IXUS 500": 7.11, + "Canon Digital IXUS 60": 5.75, + "Canon Digital IXUS 65": 5.75, + "Canon Digital IXUS 80 IS": 5.75, + "Canon Digital IXUS 800 IS": 5.75, + "Canon Digital IXUS 85 IS": 6.16, + "Canon Digital IXUS 850 IS": 5.75, + "Canon Digital IXUS 860 IS": 5.75, + "Canon Digital IXUS 870 IS": 6.16, + "Canon Digital IXUS 90 IS": 6.16, + "Canon Digital IXUS 900 Ti": 7.11, + "Canon Digital IXUS 95 IS": 6.16, + "Canon Digital IXUS 950 IS": 5.75, + "Canon Digital IXUS 960 IS": 7.53, + "Canon Digital IXUS 970 IS": 6.16, + "Canon Digital IXUS 980 IS": 7.53, + "Canon Digital IXUS 990 IS": 6.16, + "Canon Digital IXUS II": 5.33, + "Canon Digital IXUS IIs": 5.33, + "Canon Digital IXUS V": 5.33, + "Canon Digital IXUS i": 5.75, + "Canon Digital IXUS i Zoom": 5.75, + "Canon Digital IXUS i7": 5.75, + "Canon Digital IXUS v2": 5.33, + "Canon Digital IXUS v3": 5.33, + "Canon ELPH 135 / IXUS 145": 6.16, + "Canon ELPH 140 IS / IXUS 150": 6.16, + "Canon ELPH 150 IS / IXUS 155": 6.16, + "Canon EOS 1000D": 22.2, + "Canon EOS 10D": 22.7, + "Canon EOS 20D": 22.5, + "Canon EOS 20Da": 22.5, + "Canon EOS 300D": 22.7, + "Canon EOS 30D": 22.5, + "Canon EOS 350D": 22.2, + "Canon EOS 400D": 22.2, + "Canon EOS 40D": 22.2, + "Canon EOS 450D": 22.2, + "Canon EOS 500D": 22.3, + "Canon EOS 50D": 22.3, + "Canon EOS 5D": 35.8, + "Canon EOS 5D Mark II": 36.0, + "Canon EOS 5D Mark III": 36.0, + "Canon EOS 60D": 22.3, + "Canon EOS 60Da": 22.3, + "Canon EOS 6D": 35.8, + "Canon EOS 70D": 22.5, + "Canon EOS 7D": 22.3, + "Canon EOS 7D Mark II": 22.4, + "Canon EOS DIGITAL REBEL XSi": 22.2, + "Canon EOS D30": 22.7, + "Canon EOS D60": 22.7, + "Canon EOS M": 22.3, + "Canon EOS Rebel SL1 / 100D": 22.3, + "Canon EOS Rebel T2i / 550D": 22.3, + "Canon EOS Rebel T3 / 1100D": 22.2, + "Canon EOS Rebel T3i / 600D": 22.3, + "Canon EOS Rebel T4i / 650D": 22.3, + "Canon EOS Rebel T5 / 1200D": 22.3, + "Canon EOS Rebel T5i / 700D": 22.3, + "Canon EOS-1D": 28.7, + "Canon EOS-1D C": 36.0, + "Canon EOS-1D Mark II": 28.7, + "Canon EOS-1D Mark II N": 28.7, + "Canon EOS-1D Mark III": 28.7, + "Canon EOS-1D Mark IV": 27.9, + "Canon EOS-1D X": 36.0, + "Canon EOS-1Ds": 35.8, + "Canon EOS-1Ds Mark II": 36.0, + "Canon EOS-1Ds Mark III": 36.0, + "Canon IXUS 1000 HS": 6.16, + "Canon IXUS 105": 6.16, + "Canon IXUS 1100 HS": 6.16, + "Canon IXUS 115 HS": 6.16, + "Canon IXUS 125 HS": 6.16, + "Canon IXUS 130": 6.16, + "Canon IXUS 132": 6.16, + "Canon IXUS 210": 6.16, + "Canon IXUS 220 HS": 6.16, + "Canon IXUS 230 HS": 6.16, + "Canon IXUS 240 HS": 6.16, + "Canon IXUS 300 HS": 6.16, + "Canon IXUS 310 HS": 6.16, + "Canon IXUS 500 HS": 6.16, + "Canon IXUS 510 HS": 6.16, + "Canon PowerShot 350": 4.8, + "Canon PowerShot 600": 4.8, + "Canon PowerShot A1000 IS": 6.16, + "Canon PowerShot A1300": 6.16, + "Canon PowerShot A1400": 6.16, + "Canon PowerShot A2000 IS": 6.16, + "Canon PowerShot A2300": 6.16, + "Canon PowerShot A2400 IS": 6.16, + "Canon PowerShot A2500": 6.16, + "Canon PowerShot A2600": 6.16, + "Canon PowerShot A3400 IS": 6.16, + "Canon PowerShot A3500 IS": 6.16, + "Canon PowerShot A400": 4.5, + "Canon PowerShot A4000 IS": 6.16, + "Canon PowerShot A430": 4.8, + "Canon PowerShot A450": 4.8, + "Canon PowerShot A460": 4.8, + "Canon PowerShot A470": 5.75, + "Canon PowerShot A480": 6.16, + "Canon PowerShot A490": 6.16, + "Canon PowerShot A495": 6.16, + "Canon PowerShot A5": 4.8, + "Canon PowerShot A5 Zoom": 4.8, + "Canon PowerShot A50": 4.8, + "Canon PowerShot A530": 5.75, + "Canon PowerShot A550": 5.75, + "Canon PowerShot A560": 5.75, + "Canon PowerShot A570 IS": 5.75, + "Canon PowerShot A580": 5.75, + "Canon PowerShot A590 IS": 5.75, + "Canon PowerShot A620": 7.11, + "Canon PowerShot A630": 7.11, + "Canon PowerShot A640": 7.11, + "Canon PowerShot A650 IS": 7.53, + "Canon PowerShot A700": 5.75, + "Canon PowerShot A710 IS": 5.75, + "Canon PowerShot A720 IS": 5.75, + "Canon PowerShot A800": 6.16, + "Canon PowerShot A810": 6.16, + "Canon PowerShot A85": 5.33, + "Canon PowerShot A95": 7.11, + "Canon PowerShot D10": 6.16, + "Canon PowerShot D20": 6.16, + "Canon PowerShot D30": 6.16, + "Canon PowerShot E1": 6.16, + "Canon PowerShot ELPH 100 HS": 6.16, + "Canon PowerShot ELPH 110 HS": 6.16, + "Canon PowerShot ELPH 115 IS": 6.16, + "Canon PowerShot ELPH 130 IS": 6.16, + "Canon PowerShot ELPH 300 HS": 6.16, + "Canon PowerShot ELPH 310 HS": 6.16, + "Canon PowerShot ELPH 320 HS": 6.16, + "Canon PowerShot ELPH 330 HS": 6.16, + "Canon PowerShot ELPH 340 HS": 6.16, + "Canon PowerShot ELPH 500 HS": 6.16, + "Canon PowerShot ELPH 510 HS": 6.16, + "Canon PowerShot ELPH 520 HS": 6.16, + "Canon PowerShot Elph 530 HS": 6.16, + "Canon PowerShot G1 X": 18.7, + "Canon PowerShot G1 X Mark II": 18.7, + "Canon PowerShot G10": 7.53, + "Canon PowerShot G11": 7.53, + "Canon PowerShot G12": 7.53, + "Canon PowerShot G15": 7.53, + "Canon PowerShot G16": 7.53, + "Canon PowerShot G6": 7.11, + "Canon PowerShot G7": 7.11, + "Canon PowerShot G7 X": 13.2, + "Canon PowerShot G9": 7.53, + "Canon PowerShot G9 X": 13.2, + "Canon PowerShot N": 6.16, + "Canon PowerShot N100": 7.53, + "Canon PowerShot N2": 6.16, + "Canon PowerShot Pro1": 8.8, + "Canon PowerShot Pro70": 6.4, + "Canon PowerShot Pro90 IS": 7.11, + "Canon PowerShot S100": 7.53, + "Canon PowerShot S100 Digital IXUS": 5.33, + "Canon PowerShot S110": 7.53, + "Canon PowerShot S120": 7.53, + "Canon PowerShot S200": 5.33, + "Canon PowerShot S230": 5.33, + "Canon PowerShot S3 IS": 5.75, + "Canon PowerShot S300": 5.33, + "Canon PowerShot S330": 5.33, + "Canon PowerShot S400": 7.11, + "Canon PowerShot S410": 7.11, + "Canon PowerShot S5 IS": 5.75, + "Canon PowerShot S500": 7.11, + "Canon PowerShot S60": 7.11, + "Canon PowerShot S70": 7.11, + "Canon PowerShot S90": 7.53, + "Canon PowerShot S95": 7.53, + "Canon PowerShot SD10": 5.75, + "Canon PowerShot SD100": 5.33, + "Canon PowerShot SD1000": 5.75, + "Canon PowerShot SD110": 5.33, + "Canon PowerShot SD1100 IS": 5.75, + "Canon PowerShot SD1200 IS": 6.16, + "Canon PowerShot SD1300 IS": 6.16, + "Canon PowerShot SD1400 IS": 6.16, + "Canon PowerShot SD20": 5.75, + "Canon PowerShot SD200": 5.75, + "Canon PowerShot SD30": 5.75, + "Canon PowerShot SD300": 5.75, + "Canon PowerShot SD3500 IS": 6.16, + "Canon PowerShot SD40": 5.75, + "Canon PowerShot SD400": 5.75, + "Canon PowerShot SD4000 IS": 6.16, + "Canon PowerShot SD430 Wireless": 5.75, + "Canon PowerShot SD450": 5.75, + "Canon PowerShot SD4500 IS": 6.16, + "Canon PowerShot SD500": 7.11, + "Canon PowerShot SD550": 7.11, + "Canon PowerShot SD600": 5.75, + "Canon PowerShot SD630": 5.75, + "Canon PowerShot SD700 IS": 5.75, + "Canon PowerShot SD750": 5.75, + "Canon PowerShot SD770 IS": 6.16, + "Canon PowerShot SD780 IS": 6.16, + "Canon PowerShot SD790 IS": 6.16, + "Canon PowerShot SD800 IS": 5.75, + "Canon PowerShot SD850 IS": 5.75, + "Canon PowerShot SD870 IS": 5.75, + "Canon PowerShot SD880 IS": 6.16, + "Canon PowerShot SD890 IS": 6.16, + "Canon PowerShot SD900": 7.11, + "Canon PowerShot SD940 IS": 6.16, + "Canon PowerShot SD950 IS": 7.53, + "Canon PowerShot SD960 IS": 6.16, + "Canon PowerShot SD970 IS": 6.16, + "Canon PowerShot SD980 IS": 6.16, + "Canon PowerShot SD990 IS": 7.53, + "Canon PowerShot SX1 IS": 6.16, + "Canon PowerShot SX10 IS": 6.16, + "Canon PowerShot SX100 IS": 5.75, + "Canon PowerShot SX110 IS": 6.16, + "Canon PowerShot SX120 IS": 5.75, + "Canon PowerShot SX130 IS": 6.16, + "Canon PowerShot SX150 IS": 6.16, + "Canon PowerShot SX160 IS": 6.16, + "Canon PowerShot SX170 IS": 6.16, + "Canon PowerShot SX20 IS": 6.16, + "Canon PowerShot SX200 IS": 6.16, + "Canon PowerShot SX210 IS": 6.16, + "Canon PowerShot SX220 HS": 6.16, + "Canon PowerShot SX230 HS": 6.16, + "Canon PowerShot SX240 HS": 6.16, + "Canon PowerShot SX260 HS": 6.16, + "Canon PowerShot SX270 HS": 6.16, + "Canon PowerShot SX280 HS": 6.16, + "Canon PowerShot SX30 IS": 6.16, + "Canon PowerShot SX40 HS": 6.16, + "Canon PowerShot SX400 IS": 6.16, + "Canon PowerShot SX50 HS": 6.16, + "Canon PowerShot SX500 IS": 6.16, + "Canon PowerShot SX510 HS": 6.16, + "Canon PowerShot SX520 HS": 6.16, + "Canon PowerShot SX60 HS": 6.16, + "Canon PowerShot SX600 HS": 6.16, + "Canon PowerShot SX700 HS": 6.16, + "Canon PowerShot TX1": 5.75, + "Canon Powershot A10": 5.33, + "Canon Powershot A100": 4.5, + "Canon Powershot A1100 IS": 6.16, + "Canon Powershot A1200": 6.16, + "Canon Powershot A20": 5.33, + "Canon Powershot A200": 4.5, + "Canon Powershot A2100 IS": 6.16, + "Canon Powershot A2200": 6.16, + "Canon Powershot A30": 5.33, + "Canon Powershot A300": 5.33, + "Canon Powershot A3000 IS": 6.16, + "Canon Powershot A310": 5.33, + "Canon Powershot A3100 IS": 6.16, + "Canon Powershot A3200 IS": 6.16, + "Canon Powershot A3300 IS": 6.16, + "Canon Powershot A40": 5.33, + "Canon Powershot A410": 4.5, + "Canon Powershot A420": 4.8, + "Canon Powershot A510": 5.33, + "Canon Powershot A520": 5.75, + "Canon Powershot A540": 5.75, + "Canon Powershot A60": 5.33, + "Canon Powershot A610": 7.11, + "Canon Powershot A70": 5.33, + "Canon Powershot A75": 5.33, + "Canon Powershot A80": 7.11, + "Canon Powershot G1": 7.11, + "Canon Powershot G2": 7.11, + "Canon Powershot G3": 7.11, + "Canon Powershot G5": 7.11, + "Canon Powershot S1 IS": 5.33, + "Canon Powershot S10": 6.4, + "Canon Powershot S2 IS": 5.75, + "Canon Powershot S20": 7.11, + "Canon Powershot S30": 7.11, + "Canon Powershot S40": 7.11, + "Canon Powershot S45": 7.11, + "Canon Powershot S50": 7.11, + "Canon Powershot S80": 7.11, + "Canon Pro90 IS": 7.11, + "Canon S200": 7.53, + "Canon SX220 HS": 6.16, + "Casio EX-FR10": 6.16, + "Casio EX-N1": 6.16, + "Casio EX-N10": 6.16, + "Casio EX-N20": 6.16, + "Casio EX-N5": 6.16, + "Casio EX-N50": 6.16, + "Casio EX-TR10": 6.16, + "Casio EX-TR15": 6.16, + "Casio EX-ZR400": 6.16, + "Casio EX-ZR700": 6.16, + "Casio EX-ZR800": 6.16, + "Casio EX-ZS30": 6.16, + "Casio EXILIM EX-JE10": 6.16, + "Casio EXILIM EX-FC100": 6.16, + "Casio EXILIM EX-FC150": 6.16, + "Casio EXILIM EX-FC160s": 6.16, + "Casio EXILIM EX-FH100": 6.16, + "Casio EXILIM EX-FH150": 6.16, + "Casio EXILIM EX-FH20": 6.16, + "Casio EXILIM EX-FH25": 6.16, + "Casio EXILIM EX-FS10": 6.16, + "Casio EXILIM EX-G1": 6.16, + "Casio EXILIM EX-H10": 6.16, + "Casio EXILIM EX-H15": 6.16, + "Casio EXILIM EX-H20G": 6.16, + "Casio EXILIM EX-H30": 6.16, + "Casio EXILIM EX-H5": 6.16, + "Casio EXILIM EX-H50": 6.16, + "Casio EXILIM EX-M1": 5.33, + "Casio EXILIM EX-M2": 7.11, + "Casio EXILIM EX-M20": 5.33, + "Casio EXILIM EX-S1": 5.33, + "Casio EXILIM EX-S10": 6.16, + "Casio EXILIM EX-S12": 6.16, + "Casio EXILIM EX-S2": 7.11, + "Casio EXILIM EX-S20": 5.33, + "Casio EXILIM EX-S200": 6.16, + "Casio EXILIM EX-S3": 7.11, + "Casio EXILIM EX-S5": 6.16, + "Casio EXILIM EX-S600D": 5.75, + "Casio EXILIM EX-S7": 6.16, + "Casio EXILIM EX-S770": 5.75, + "Casio EXILIM EX-S770D": 5.75, + "Casio EXILIM EX-S8": 6.08, + "Casio EXILIM EX-S880": 5.75, + "Casio EXILIM EX-TR100": 6.16, + "Casio EXILIM EX-TR150": 6.16, + "Casio EXILIM EX-V7": 5.75, + "Casio EXILIM EX-V8": 5.75, + "Casio EXILIM EX-Z1": 6.16, + "Casio EXILIM EX-Z10": 5.75, + "Casio EXILIM EX-Z100": 6.16, + "Casio EXILIM EX-Z1000": 7.11, + "Casio EXILIM EX-Z1050": 7.31, + "Casio EXILIM EX-Z1080": 7.31, + "Casio EXILIM EX-Z110": 5.75, + "Casio EXILIM EX-Z120": 7.11, + "Casio EXILIM EX-Z150": 5.75, + "Casio EXILIM EX-Z16": 6.16, + "Casio EXILIM EX-Z19": 5.75, + "Casio EXILIM EX-Z2": 6.16, + "Casio EXILIM EX-Z20": 5.75, + "Casio EXILIM EX-Z200": 6.16, + "Casio EXILIM EX-Z2000": 6.16, + "Casio EXILIM EX-Z2300": 6.16, + "Casio EXILIM EX-Z25": 6.16, + "Casio EXILIM EX-Z250": 5.75, + "Casio EXILIM EX-Z270": 5.75, + "Casio EXILIM EX-Z280": 5.75, + "Casio EXILIM EX-Z29": 5.75, + "Casio EXILIM EX-Z3": 5.75, + "Casio EXILIM EX-Z30": 5.75, + "Casio EXILIM EX-Z300": 6.16, + "Casio EXILIM EX-Z3000": 6.16, + "Casio EXILIM EX-Z33": 6.16, + "Casio EXILIM EX-Z330": 6.16, + "Casio EXILIM EX-Z35": 6.16, + "Casio EXILIM EX-Z350": 6.16, + "Casio EXILIM EX-Z4": 5.75, + "Casio EXILIM EX-Z40": 5.75, + "Casio EXILIM EX-Z400": 6.16, + "Casio EXILIM EX-Z450": 6.16, + "Casio EXILIM EX-Z5": 5.75, + "Casio EXILIM EX-Z50": 5.75, + "Casio EXILIM EX-Z55": 5.75, + "Casio EXILIM EX-Z550": 6.16, + "Casio EXILIM EX-Z6": 5.75, + "Casio EXILIM EX-Z60": 7.11, + "Casio EXILIM EX-Z600": 5.75, + "Casio EXILIM EX-Z65": 5.75, + "Casio EXILIM EX-Z7": 5.75, + "Casio EXILIM EX-Z70": 5.75, + "Casio EXILIM EX-Z700": 5.75, + "Casio EXILIM EX-Z75": 5.75, + "Casio EXILIM EX-Z77": 5.75, + "Casio EXILIM EX-Z8": 5.75, + "Casio EXILIM EX-Z80": 5.75, + "Casio EXILIM EX-Z800": 6.16, + "Casio EXILIM EX-Z85": 5.75, + "Casio EXILIM EX-Z850": 7.11, + "Casio EXILIM EX-Z9": 5.75, + "Casio EXILIM EX-Z90": 6.16, + "Casio EXILIM EX-ZR10": 6.16, + "Casio EXILIM EX-ZR100": 6.16, + "Casio EXILIM EX-ZR15": 6.16, + "Casio EXILIM EX-ZR20": 6.16, + "Casio EXILIM EX-ZR200": 6.16, + "Casio EXILIM EX-ZR300": 6.16, + "Casio EXILIM EX-ZS10": 6.16, + "Casio EXILIM EX-ZS100": 6.16, + "Casio EXILIM EX-ZS12": 6.16, + "Casio EXILIM EX-ZS15": 6.16, + "Casio EXILIM EX-ZS150": 6.16, + "Casio EXILIM EX-ZS20": 6.16, + "Casio EXILIM EX-ZS6": 6.16, + "Casio EXILIM Pro EX-F1": 7.11, + "Casio EXILIM QV-R100": 6.16, + "Casio Exilim EX-10": 7.53, + "Casio Exilim EX-100": 7.53, + "Casio Exilim EX-P505": 5.75, + "Casio Exilim EX-P600": 7.11, + "Casio Exilim EX-P700": 7.11, + "Casio Exilim EX-S100": 4.5, + "Casio Exilim EX-S500": 5.75, + "Casio Exilim EX-S600": 5.75, + "Casio Exilim EX-Z1200 SR": 7.53, + "Casio Exilim EX-Z500": 5.75, + "Casio Exilim EX-Z57": 5.75, + "Casio Exilim EX-Z750": 7.11, + "Casio Exilim EX-ZR1000": 6.16, + "Casio Exilim EX-ZR1100": 6.16, + "Casio Exilim EX-ZS5": 6.16, + "Casio Exilim TRYX": 6.16, + "Casio GV-10": 4.5, + "Casio GV-20": 4.5, + "Casio QV-2000UX": 6.4, + "Casio QV-2100": 5.33, + "Casio QV-2300UX": 5.33, + "Casio QV-2400UX": 5.33, + "Casio QV-2800UX": 5.33, + "Casio QV-2900UX": 5.33, + "Casio QV-300": 4.8, + "Casio QV-3000EX": 7.11, + "Casio QV-3500EX": 7.11, + "Casio QV-3EX / XV-3": 7.11, + "Casio QV-4000": 7.11, + "Casio QV-5000SX": 4.8, + "Casio QV-5500SX": 4.8, + "Casio QV-5700": 7.11, + "Casio QV-700": 4.8, + "Casio QV-7000SX": 4.8, + "Casio QV-770": 4.8, + "Casio QV-8000SX": 4.8, + "Casio QV-R3": 7.11, + "Casio QV-R4": 7.11, + "Casio QV-R40": 7.11, + "Casio QV-R41": 7.11, + "Casio QV-R51": 7.11, + "Casio QV-R52": 7.11, + "Casio QV-R61": 7.11, + "Casio QV-R62": 7.11, + "Concord 00.": 6.4, + "Concord 2.": 6.4, + "Concord 3345z": 6.4, + "Concord 3346z": 6.4, + "Concord 40.": 7.11, + "Concord 42.": 5.75, + "Concord 43.": 6.4, + "Concord 4340z": 5.75, + "Concord 45.": 6.4, + "Concord 46.": 6.4, + "Concord 47.": 6.4, + "Concord 5340z": 5.75, + "Concord 5345z": 7.11, + "Concord 6340z": 7.11, + "Concord DVx": 6.4, + "Concord ES500z": 5.75, + "Concord ES510z": 7.11, + "Concord Eye-Q 1000": 6.4, + "Concord Eye-Q 1300": 6.4, + "Concord Eye-Q 2040": 6.4, + "Concord Eye-Q 2133z": 6.4, + "Concord Eye-Q 3040AF": 6.4, + "Concord Eye-Q 3103": 6.4, + "Concord Eye-Q 3120 AF": 4.0, + "Concord Eye-Q 3132z": 6.4, + "Concord Eye-Q 3340z": 5.33, + "Concord Eye-Q 3341z": 7.11, + "Concord Eye-Q 3343z": 5.33, + "Concord Eye-Q 4060AF": 7.11, + "Concord Eye-Q 4330z": 7.11, + "Concord Eye-Q 4342z": 7.11, + "Concord Eye-Q 4360z": 7.11, + "Concord Eye-Q 4363z": 7.11, + "Concord Eye-Q 5062AF": 7.11, + "Concord Eye-Q 5330z": 7.11, + "Concord Eye-Q Duo 2000": 7.11, + "Concord Eye-Q Duo LCD": 6.4, + "Concord Eye-Q Go 2000": 7.11, + "Concord Eye-Q Go LCD": 6.4, + "Concord Eye-Q Go Wireless": 7.11, + "Contax N Digital": 36.0, + "Contax SL300R T": 5.33, + "Contax TVS Digital": 7.11, + "Contax U4R": 5.33, + "Contax i4R": 5.33, + "DJI FC300S": 6.16, + "DJI FC300C": 6.31, + "DJI FC300X": 6.2, + "DJI FC350": 6.17, + "DJI FC350Z": 6.17, + "DJI FC330": 6.25, + "DJI FC220": 6.17, + "DJI FC2103": 6.07, + "DJI FC6310": 13.2, + "DJI FC6540": 23.5, + "Epson L-500V": 5.75, + "Epson PhotoPC 3000 Zoom": 7.11, + "Epson PhotoPC 3100 Zoom": 7.11, + "Epson PhotoPC 500": 4.8, + "Epson PhotoPC 550": 4.8, + "Epson PhotoPC 600": 4.8, + "Epson PhotoPC 650": 4.8, + "Epson PhotoPC 700": 4.8, + "Epson PhotoPC 750 Zoom": 6.4, + "Epson PhotoPC 800": 6.4, + "Epson PhotoPC 850 Zoom": 6.4, + "Epson PhotoPC L-200": 5.75, + "Epson PhotoPC L-300": 5.75, + "Epson PhotoPC L-400": 5.75, + "Epson PhotoPC L-410": 5.75, + "Epson PhotoPC L-500V": 5.75, + "Epson R-D1": 23.7, + "Epson R-D1xG": 23.7, + "Epson RD-1s": 23.7, + "Fujifilm A850": 5.75, + "Fujifilm Bigjob HD-3W": 5.75, + "Fujifilm Bigjob HD1": 5.33, + "Fujifilm DS-260HD": 6.4, + "Fujifilm DS-300": 8.8, + "Fujifilm Digital Q1": 6.4, + "Fujifilm FinePix 1300": 5.33, + "Fujifilm FinePix 1400z": 5.33, + "Fujifilm FinePix 2300": 5.33, + "Fujifilm FinePix 2400 Zoom": 5.33, + "Fujifilm FinePix 2600 Zoom": 5.33, + "Fujifilm FinePix 2650": 5.33, + "Fujifilm FinePix 2800 Zoom": 5.33, + "Fujifilm FinePix 3800": 5.33, + "Fujifilm FinePix 40i": 5.33, + "Fujifilm FinePix 4700 Zoom": 7.53, + "Fujifilm FinePix 4800 Zoom": 7.53, + "Fujifilm FinePix 4900 Zoom": 7.53, + "Fujifilm FinePix 50i": 7.53, + "Fujifilm FinePix 6800 Zoom": 7.53, + "Fujifilm FinePix 6900 Zoom": 7.53, + "Fujifilm FinePix A100": 6.16, + "Fujifilm FinePix A101": 5.33, + "Fujifilm FinePix A120": 5.33, + "Fujifilm FinePix A150": 6.16, + "Fujifilm FinePix A160": 6.16, + "Fujifilm FinePix A170": 6.16, + "Fujifilm FinePix A175": 6.16, + "Fujifilm FinePix A180": 6.16, + "Fujifilm FinePix A200": 5.33, + "Fujifilm FinePix A201": 5.33, + "Fujifilm FinePix A202": 5.33, + "Fujifilm FinePix A203": 5.33, + "Fujifilm FinePix A204": 5.33, + "Fujifilm FinePix A205 Zoom": 5.33, + "Fujifilm FinePix A210 Zoom": 5.33, + "Fujifilm FinePix A220": 6.16, + "Fujifilm FinePix A225": 6.16, + "Fujifilm FinePix A235": 6.16, + "Fujifilm FinePix A303": 5.33, + "Fujifilm FinePix A310 Zoom": 5.33, + "Fujifilm FinePix A330": 5.33, + "Fujifilm FinePix A340": 5.33, + "Fujifilm FinePix A345 Zoom": 5.75, + "Fujifilm FinePix A350 Zoom": 5.75, + "Fujifilm FinePix A400 Zoom": 5.75, + "Fujifilm FinePix A500 Zoom": 5.75, + "Fujifilm FinePix A510": 5.75, + "Fujifilm FinePix A600 Zoom": 7.53, + "Fujifilm FinePix A610": 5.75, + "Fujifilm FinePix A700": 8.0, + "Fujifilm FinePix A800": 8.0, + "Fujifilm FinePix A820": 8.0, + "Fujifilm FinePix A825": 8.0, + "Fujifilm FinePix A900": 8.0, + "Fujifilm FinePix A920": 8.0, + "Fujifilm FinePix AV100": 6.16, + "Fujifilm FinePix AV105": 6.16, + "Fujifilm FinePix AV110": 6.16, + "Fujifilm FinePix AV130": 6.16, + "Fujifilm FinePix AV140": 6.16, + "Fujifilm FinePix AV150": 6.16, + "Fujifilm FinePix AV180": 6.16, + "Fujifilm FinePix AV200": 6.16, + "Fujifilm FinePix AV205": 6.16, + "Fujifilm FinePix AV250": 6.16, + "Fujifilm FinePix AV255": 6.16, + "Fujifilm FinePix AX200": 6.16, + "Fujifilm FinePix AX205": 6.16, + "Fujifilm FinePix AX230": 6.16, + "Fujifilm FinePix AX245w": 6.16, + "Fujifilm FinePix AX250": 6.16, + "Fujifilm FinePix AX280": 6.16, + "Fujifilm FinePix AX300": 6.16, + "Fujifilm FinePix AX305": 6.16, + "Fujifilm FinePix AX350": 6.16, + "Fujifilm FinePix AX355": 6.16, + "Fujifilm FinePix AX500": 6.16, + "Fujifilm FinePix AX550": 6.16, + "Fujifilm FinePix AX650": 6.16, + "Fujifilm FinePix E500 Zoom": 5.75, + "Fujifilm FinePix E510 Zoom": 5.75, + "Fujifilm FinePix E550 Zoom": 7.53, + "Fujifilm FinePix E900 Zoom": 8.0, + "Fujifilm FinePix EX-20": 5.33, + "Fujifilm FinePix F10 Zoom": 7.53, + "Fujifilm FinePix F100fd": 8.0, + "Fujifilm FinePix F11 Zoom": 7.53, + "Fujifilm FinePix F20 Zoom": 7.53, + "Fujifilm FinePix F200EXR": 8.0, + "Fujifilm FinePix F30 Zoom": 7.53, + "Fujifilm FinePix F300EXR": 6.4, + "Fujifilm FinePix F305EXR": 6.4, + "Fujifilm FinePix F31fd": 7.53, + "Fujifilm FinePix F401 Zoom": 5.33, + "Fujifilm FinePix F402": 5.33, + "Fujifilm FinePix F40fd": 8.0, + "Fujifilm FinePix F410 Zoom": 5.33, + "Fujifilm FinePix F420 Zoom": 5.33, + "Fujifilm FinePix F440 Zoom": 5.75, + "Fujifilm FinePix F450 Zoom": 5.75, + "Fujifilm FinePix F455 Zoom": 5.75, + "Fujifilm FinePix F45fd": 8.0, + "Fujifilm FinePix F460": 5.75, + "Fujifilm FinePix F470 Zoom": 5.75, + "Fujifilm FinePix F47fd": 8.0, + "Fujifilm FinePix F480 Zoom": 5.75, + "Fujifilm FinePix F500 EXR": 6.4, + "Fujifilm FinePix F50fd": 8.0, + "Fujifilm FinePix F550 EXR": 6.4, + "Fujifilm FinePix F600 EXR": 6.4, + "Fujifilm FinePix F601 Zoom": 7.53, + "Fujifilm FinePix F605EXR": 6.4, + "Fujifilm FinePix F60fd": 8.0, + "Fujifilm FinePix F610": 7.53, + "Fujifilm FinePix F650 Zoom": 5.75, + "Fujifilm FinePix F660EXR": 6.4, + "Fujifilm FinePix F700": 7.53, + "Fujifilm FinePix F70EXR": 6.4, + "Fujifilm FinePix F710": 7.53, + "Fujifilm FinePix F72EXR": 6.4, + "Fujifilm FinePix F750EXR": 6.4, + "Fujifilm FinePix F75EXR": 6.4, + "Fujifilm FinePix F770EXR": 6.4, + "Fujifilm FinePix F800EXR": 6.4, + "Fujifilm FinePix F80EXR": 6.4, + "Fujifilm FinePix F810 Zoom": 7.53, + "Fujifilm FinePix F850EXR": 6.4, + "Fujifilm FinePix F85EXR": 6.4, + "Fujifilm FinePix F900EXR": 6.4, + "Fujifilm FinePix HS10": 6.16, + "Fujifilm FinePix HS11": 6.16, + "Fujifilm FinePix HS20 EXR": 6.4, + "Fujifilm FinePix HS22 EXR": 6.4, + "Fujifilm FinePix HS25 EXR": 6.4, + "Fujifilm FinePix HS30 EXR": 6.4, + "Fujifilm FinePix HS35 EXR": 6.4, + "Fujifilm FinePix HS50 EXR": 6.4, + "Fujifilm FinePix IS Pro": 23.0, + "Fujifilm FinePix IS-1": 8.0, + "Fujifilm FinePix J10": 5.75, + "Fujifilm FinePix J100": 6.16, + "Fujifilm FinePix J110w": 6.16, + "Fujifilm FinePix J12": 5.75, + "Fujifilm FinePix J120": 6.16, + "Fujifilm FinePix J15": 5.75, + "Fujifilm FinePix J150w": 6.16, + "Fujifilm FinePix J20": 6.16, + "Fujifilm FinePix J210": 6.16, + "Fujifilm FinePix J22": 6.16, + "Fujifilm FinePix J25": 6.16, + "Fujifilm FinePix J250": 6.16, + "Fujifilm FinePix J26": 6.16, + "Fujifilm FinePix J27": 6.16, + "Fujifilm FinePix J28": 6.16, + "Fujifilm FinePix J29": 6.16, + "Fujifilm FinePix J30": 6.16, + "Fujifilm FinePix J32": 6.16, + "Fujifilm FinePix J35": 6.16, + "Fujifilm FinePix J37": 6.16, + "Fujifilm FinePix J38": 6.16, + "Fujifilm FinePix J50": 5.75, + "Fujifilm FinePix JV100": 6.16, + "Fujifilm FinePix JV105": 6.16, + "Fujifilm FinePix JV110": 6.16, + "Fujifilm FinePix JV150": 6.16, + "Fujifilm FinePix JV200": 6.16, + "Fujifilm FinePix JV205": 6.16, + "Fujifilm FinePix JV250": 6.16, + "Fujifilm FinePix JV255": 6.16, + "Fujifilm FinePix JX200": 6.16, + "Fujifilm FinePix JX205": 6.16, + "Fujifilm FinePix JX210": 6.16, + "Fujifilm FinePix JX250": 6.16, + "Fujifilm FinePix JX280": 6.16, + "Fujifilm FinePix JX300": 6.16, + "Fujifilm FinePix JX305": 6.16, + "Fujifilm FinePix JX350": 6.16, + "Fujifilm FinePix JX355": 6.16, + "Fujifilm FinePix JX370": 6.16, + "Fujifilm FinePix JX375": 6.16, + "Fujifilm FinePix JX400": 6.16, + "Fujifilm FinePix JX405": 6.16, + "Fujifilm FinePix JX420": 6.16, + "Fujifilm FinePix JX500": 6.16, + "Fujifilm FinePix JX520": 6.16, + "Fujifilm FinePix JX530": 6.16, + "Fujifilm FinePix JX550": 6.16, + "Fujifilm FinePix JX580": 6.16, + "Fujifilm FinePix JX700": 6.16, + "Fujifilm FinePix JZ100": 6.16, + "Fujifilm FinePix JZ200": 6.16, + "Fujifilm FinePix JZ250": 6.16, + "Fujifilm FinePix JZ300": 6.16, + "Fujifilm FinePix JZ305": 6.16, + "Fujifilm FinePix JZ310": 6.16, + "Fujifilm FinePix JZ500": 6.16, + "Fujifilm FinePix JZ505": 6.16, + "Fujifilm FinePix JZ510": 6.16, + "Fujifilm FinePix JZ700": 6.16, + "Fujifilm FinePix M603": 7.53, + "Fujifilm FinePix PR21": 6.4, + "Fujifilm FinePix Real 3D W1": 6.16, + "Fujifilm FinePix Real 3D W3": 6.16, + "Fujifilm FinePix S1": 6.16, + "Fujifilm FinePix S1 Pro": 23.0, + "Fujifilm FinePix S1000fd": 6.16, + "Fujifilm FinePix S100fs": 8.8, + "Fujifilm FinePix S1500": 6.16, + "Fujifilm FinePix S1600": 6.16, + "Fujifilm FinePix S1700": 6.16, + "Fujifilm FinePix S1730": 6.16, + "Fujifilm FinePix S1770": 6.16, + "Fujifilm FinePix S1800": 6.16, + "Fujifilm FinePix S1850": 6.16, + "Fujifilm FinePix S1880": 6.16, + "Fujifilm FinePix S1900": 6.16, + "Fujifilm FinePix S2 Pro": 23.0, + "Fujifilm FinePix S20 Pro": 7.53, + "Fujifilm FinePix S2000hd": 6.16, + "Fujifilm FinePix S200EXR": 8.0, + "Fujifilm FinePix S205EXR": 8.0, + "Fujifilm FinePix S2500hd": 6.16, + "Fujifilm FinePix S2550hd": 6.16, + "Fujifilm FinePix S2600hd": 6.16, + "Fujifilm FinePix S2800hd": 6.16, + "Fujifilm FinePix S2900hd": 6.16, + "Fujifilm FinePix S2950": 6.16, + "Fujifilm FinePix S2980": 6.16, + "Fujifilm FinePix S2990": 6.16, + "Fujifilm FinePix S3 Pro": 23.5, + "Fujifilm FinePix S3000 Z": 5.33, + "Fujifilm FinePix S304": 5.33, + "Fujifilm FinePix S3200": 6.16, + "Fujifilm FinePix S3250": 6.16, + "Fujifilm FinePix S3300": 6.16, + "Fujifilm FinePix S3350": 6.16, + "Fujifilm FinePix S3400": 6.16, + "Fujifilm FinePix S3450": 6.16, + "Fujifilm FinePix S3500 Zoom": 5.33, + "Fujifilm FinePix S4000": 6.16, + "Fujifilm FinePix S4050": 6.16, + "Fujifilm FinePix S4200": 6.16, + "Fujifilm FinePix S4300": 6.16, + "Fujifilm FinePix S4400": 6.16, + "Fujifilm FinePix S4500": 6.16, + "Fujifilm FinePix S4600": 6.16, + "Fujifilm FinePix S4700": 6.16, + "Fujifilm FinePix S4800": 6.16, + "Fujifilm FinePix S5 Pro": 23.0, + "Fujifilm FinePix S5000 Zoom": 5.33, + "Fujifilm FinePix S5100 Zoom": 5.33, + "Fujifilm FinePix S5200 Zoom": 5.75, + "Fujifilm FinePix S5500 Zoom": 5.33, + "Fujifilm FinePix S5600 Zoom": 5.75, + "Fujifilm FinePix S5700 Zoom": 5.75, + "Fujifilm FinePix S5800": 5.75, + "Fujifilm FinePix S6000fd": 7.53, + "Fujifilm FinePix S602 Zoom": 7.53, + "Fujifilm FinePix S602Z Pro": 7.53, + "Fujifilm FinePix S6500fd": 7.53, + "Fujifilm FinePix S6600": 6.16, + "Fujifilm FinePix S6700": 6.16, + "Fujifilm FinePix S6800": 6.16, + "Fujifilm FinePix S7000 Zoom": 7.53, + "Fujifilm FinePix S8000fd": 6.03, + "Fujifilm FinePix S8100fd": 6.16, + "Fujifilm FinePix S8200": 6.16, + "Fujifilm FinePix S8300": 6.16, + "Fujifilm FinePix S8400": 6.16, + "Fujifilm FinePix S8500": 6.16, + "Fujifilm FinePix S8600": 6.16, + "Fujifilm FinePix S9000 Zoom": 8.0, + "Fujifilm FinePix S9100": 8.0, + "Fujifilm FinePix S9200": 6.16, + "Fujifilm FinePix S9400W": 6.16, + "Fujifilm FinePix S9500": 8.0, + "Fujifilm FinePix S9600": 8.0, + "Fujifilm FinePix SL1000": 6.16, + "Fujifilm FinePix SL240": 6.16, + "Fujifilm FinePix SL260": 6.16, + "Fujifilm FinePix SL280": 6.16, + "Fujifilm FinePix SL300": 6.16, + "Fujifilm FinePix T200": 6.16, + "Fujifilm FinePix T205": 6.16, + "Fujifilm FinePix T300": 6.16, + "Fujifilm FinePix T305": 6.16, + "Fujifilm FinePix T350": 6.16, + "Fujifilm FinePix T400": 6.16, + "Fujifilm FinePix T500": 6.16, + "Fujifilm FinePix T550": 6.16, + "Fujifilm FinePix V10 Zoom": 5.75, + "Fujifilm FinePix X100": 23.6, + "Fujifilm FinePix XP10": 6.16, + "Fujifilm FinePix XP100": 6.16, + "Fujifilm FinePix XP11": 6.16, + "Fujifilm FinePix XP150": 6.16, + "Fujifilm FinePix XP170": 6.16, + "Fujifilm FinePix XP20": 6.16, + "Fujifilm FinePix XP200": 6.16, + "Fujifilm FinePix XP22": 6.16, + "Fujifilm FinePix XP30": 6.16, + "Fujifilm FinePix XP33": 6.16, + "Fujifilm FinePix XP50": 6.16, + "Fujifilm FinePix XP60": 6.16, + "Fujifilm FinePix XP70": 6.16, + "Fujifilm FinePix Z1": 5.75, + "Fujifilm FinePix Z1000EXR": 6.4, + "Fujifilm FinePix Z100fd": 5.75, + "Fujifilm FinePix Z10fd": 5.75, + "Fujifilm FinePix Z110": 6.16, + "Fujifilm FinePix Z2": 5.75, + "Fujifilm FinePix Z200fd": 6.16, + "Fujifilm FinePix Z20fd": 6.16, + "Fujifilm FinePix Z3": 5.75, + "Fujifilm FinePix Z30": 6.16, + "Fujifilm FinePix Z300": 6.16, + "Fujifilm FinePix Z31": 6.16, + "Fujifilm FinePix Z33WP": 6.16, + "Fujifilm FinePix Z35": 6.16, + "Fujifilm FinePix Z37": 6.16, + "Fujifilm FinePix Z5fd": 5.75, + "Fujifilm FinePix Z70": 6.16, + "Fujifilm FinePix Z700EXR": 6.4, + "Fujifilm FinePix Z707EXR": 6.4, + "Fujifilm FinePix Z71": 6.16, + "Fujifilm FinePix Z80": 6.16, + "Fujifilm FinePix Z800EXR": 6.4, + "Fujifilm FinePix Z808EXR": 6.4, + "Fujifilm FinePix Z81": 6.16, + "Fujifilm FinePix Z90": 6.16, + "Fujifilm FinePix Z900EXR": 6.4, + "Fujifilm FinePix Z909EXR": 6.4, + "Fujifilm FinePix Z91": 6.16, + "Fujifilm FinePix Z950EXR": 6.4, + "Fujifilm Finepix 30i": 5.33, + "Fujifilm MX-1200": 6.4, + "Fujifilm MX-1400": 5.33, + "Fujifilm MX-1500": 6.4, + "Fujifilm MX-1700": 6.4, + "Fujifilm MX-2700": 6.4, + "Fujifilm MX-2900 Zoom": 6.4, + "Fujifilm MX-500": 6.4, + "Fujifilm MX-600 Zoom": 6.4, + "Fujifilm MX-700": 6.4, + "Fujifilm X-A1": 23.6, + "Fujifilm X-E1": 23.6, + "Fujifilm X-E2": 23.6, + "Fujifilm X-M1": 23.6, + "Fujifilm X-Pro1": 23.6, + "Fujifilm X-S1": 8.8, + "Fujifilm X-T1": 23.6, + "Fujifilm X10": 8.8, + "Fujifilm X100S": 23.6, + "Fujifilm X100T": 23.6, + "Fujifilm X20": 8.8, + "Fujifilm X30": 8.8, + "Fujifilm XF1": 8.8, + "Fujifilm XQ1": 8.8, + "GE A1030": 7.53, + "GE A1035": 6.08, + "GE A1050": 6.08, + "GE A1235": 6.16, + "GE A1250": 6.16, + "GE A1255": 6.08, + "GE A1455": 6.16, + "GE A1456W": 6.16, + "GE A730": 5.75, + "GE A735": 5.75, + "GE A830": 5.75, + "GE A835": 5.75, + "GE A950": 6.08, + "GE C1033": 6.08, + "GE C1233": 6.08, + "GE C1433": 6.16, + "GE C1440W": 6.16, + "GE Create": 6.16, + "GE E1030": 7.53, + "GE E1035": 7.53, + "GE E1040": 7.53, + "GE E1050": 6.08, + "GE E1050 TW": 6.08, + "GE E1055 W": 6.08, + "GE E1235": 7.44, + "GE E1240": 7.53, + "GE E1250TW": 6.16, + "GE E1255W": 6.16, + "GE E1276W": 6.16, + "GE E1410SW": 6.16, + "GE E1450W": 6.16, + "GE E1480W": 6.16, + "GE E1486TW": 6.16, + "GE E1680W": 6.08, + "GE E840S": 5.75, + "GE E850": 5.75, + "GE G 1": 5.75, + "GE G100": 6.16, + "GE G2": 6.03, + "GE G3": 6.08, + "GE G3WP": 6.16, + "GE G5WP": 6.16, + "GE J1050": 6.08, + "GE J1250": 6.16, + "GE J1455": 6.16, + "GE J1456W": 6.16, + "GE J1458W": 6.16, + "GE J1470S": 6.08, + "GE PJ1": 6.16, + "GE X1": 5.75, + "GE X3": 6.08, + "GE X500": 6.16, + "GE X550": 6.16, + "GE X600": 6.16, + "Hasselblad L2D-20c": 17.5, + "HP CA350": 6.16, + "HP CB350": 6.16, + "HP CW450": 6.16, + "HP CW450t": 6.16, + "HP PW460t": 6.16, + "HP PW550": 6.16, + "HP Photosmart 120": 6.4, + "HP Photosmart 318": 5.33, + "HP Photosmart 320": 5.33, + "HP Photosmart 435": 5.33, + "HP Photosmart 612": 5.33, + "HP Photosmart 620": 4.8, + "HP Photosmart 635": 4.5, + "HP Photosmart 715": 7.11, + "HP Photosmart 720": 7.11, + "HP Photosmart 733": 5.33, + "HP Photosmart 735": 5.33, + "HP Photosmart 812": 7.11, + "HP Photosmart 850": 7.11, + "HP Photosmart 935": 7.11, + "HP Photosmart 945": 7.11, + "HP Photosmart C20": 8.8, + "HP Photosmart C200": 8.8, + "HP Photosmart C215": 5.33, + "HP Photosmart C30": 8.8, + "HP Photosmart C315": 5.33, + "HP Photosmart C500": 8.8, + "HP Photosmart C618": 5.33, + "HP Photosmart C912": 8.8, + "HP Photosmart E317": 5.75, + "HP Photosmart E327": 5.75, + "HP Photosmart E337": 5.75, + "HP Photosmart E427": 5.75, + "HP Photosmart M22": 5.75, + "HP Photosmart M23": 5.75, + "HP Photosmart M307": 5.33, + "HP Photosmart M407": 5.75, + "HP Photosmart M417": 5.75, + "HP Photosmart M425": 5.75, + "HP Photosmart M437": 5.75, + "HP Photosmart M447": 5.75, + "HP Photosmart M517": 5.75, + "HP Photosmart M525": 5.75, + "HP Photosmart M527": 5.75, + "HP Photosmart M537": 5.75, + "HP Photosmart M547": 5.75, + "HP Photosmart M627": 5.75, + "HP Photosmart M637": 5.75, + "HP Photosmart M737": 5.75, + "HP Photosmart Mz67": 7.11, + "HP Photosmart R507": 5.75, + "HP Photosmart R607": 5.75, + "HP Photosmart R707": 7.11, + "HP Photosmart R717": 7.11, + "HP Photosmart R725": 5.75, + "HP Photosmart R727": 5.75, + "HP Photosmart R742": 5.75, + "HP Photosmart R817": 5.75, + "HP Photosmart R818": 5.75, + "HP Photosmart R827": 5.75, + "HP Photosmart R837": 5.75, + "HP Photosmart R847": 5.75, + "HP Photosmart R927": 7.11, + "HP Photosmart R937": 5.75, + "HP Photosmart R967": 7.11, + "HP R607 BMW": 5.75, + "HP R607 Harajuku": 5.75, + "HP SB360": 6.16, + "HP SW450": 6.16, + "HTC One X": 4.8, + "HTC One": 4.54, + "JVC GC-QX3HD": 7.11, + "JVC GC-QX5HD": 7.11, + "Jenoptik JD 1300 D": 6.4, + "Jenoptik JD 1300 F": 6.4, + "Jenoptik JD 1500 z3": 6.4, + "Jenoptik JD 2.1 FF": 4.5, + "Jenoptik JD 2.1 xz3": 4.5, + "Jenoptik JD 2100 AF": 5.33, + "Jenoptik JD 2100 F": 5.33, + "Jenoptik JD 2100 M": 5.33, + "Jenoptik JD 2100 z3 S": 5.33, + "Jenoptik JD 2300 z3": 7.53, + "Jenoptik JD 3.1 exclusiv": 6.4, + "Jenoptik JD 3.1 z3 MPEG 4": 5.33, + "Jenoptik JD 3.3 AF": 5.33, + "Jenoptik JD 3.3 xz3": 5.33, + "Jenoptik JD 3.3x4 ie": 5.33, + "Jenoptik JD 3.3z10": 5.33, + "Jenoptik JD 3300 z3": 7.11, + "Jenoptik JD 3300 z3 S": 7.11, + "Jenoptik JD 4.0 LCD": 7.11, + "Jenoptik JD 4.1 xz3": 5.75, + "Jenoptik JD 4.1 z3 MPEG4": 5.75, + "Jenoptik JD 4.1 z8": 5.75, + "Jenoptik JD 4.1 zoom": 5.75, + "Jenoptik JD 4100 z3": 7.53, + "Jenoptik JD 4100 z3 S": 7.11, + "Jenoptik JD 4100 zoom": 7.11, + "Jenoptik JD 4360z": 7.11, + "Jenoptik JD 4363z": 7.11, + "Jenoptik JD 5.0z3 EasyShot": 5.75, + "Jenoptik JD 5.2 z3": 7.11, + "Jenoptik JD 5.2 z3 MPEG4": 7.11, + "Jenoptik JD 5.2 zoom": 5.75, + "Jenoptik JD 5200 z3": 7.11, + "Jenoptik JD 6.0 z3": 7.11, + "Jenoptik JD 6.0 z3 MPEG4": 7.11, + "Jenoptik JD 6.0 z3 exclusiv": 7.11, + "Jenoptik JD 8.0 exclusiv": 7.11, + "Jenoptik JD 8.0z3 EasyShot": 7.11, + "Jenoptik JD C 1.3 LCD": 6.4, + "Jenoptik JD C 1.3 SD": 6.4, + "Jenoptik JD C 1300": 6.4, + "Jenoptik JD C 2.1 LCD": 6.4, + "Jenoptik JD C 3.0 S": 6.4, + "Jenoptik JD C 3.1 LCD": 6.4, + "Jenoptik JD C 3.1 LI": 6.4, + "Jenoptik JD C 3.1 SL": 6.4, + "Jenoptik JD C 3.1 z3": 6.4, + "Jenoptik JD C 5.0 SL": 7.11, + "Kodak DC200": 7.27, + "Kodak DC200 plus": 7.27, + "Kodak DC210 plus": 7.27, + "Kodak DC215": 7.27, + "Kodak DC220": 7.27, + "Kodak DC240": 7.27, + "Kodak DC260": 7.27, + "Kodak DC265": 7.27, + "Kodak DC280": 7.53, + "Kodak DC290": 7.27, + "Kodak DC3200": 7.53, + "Kodak DC3400": 7.53, + "Kodak DC3800": 7.53, + "Kodak DC4800": 7.27, + "Kodak DC5000": 7.27, + "Kodak DCS Pro 14n": 36.0, + "Kodak DCS Pro SLR/c": 36.0, + "Kodak DCS Pro SLR/n": 36.0, + "Kodak DCS315": 27.65, + "Kodak DCS330": 18.1, + "Kodak DCS420": 14.0, + "Kodak DCS460": 27.65, + "Kodak DCS520": 27.65, + "Kodak DCS560": 27.65, + "Kodak DCS620": 27.65, + "Kodak DCS620x": 22.8, + "Kodak DCS660": 27.65, + "Kodak DCS720x": 22.8, + "Kodak DCS760": 27.65, + "Kodak DX3215": 5.33, + "Kodak DX3500": 6.4, + "Kodak DX3600": 6.4, + "Kodak DX3700": 7.11, + "Kodak DX3900": 7.11, + "Kodak DX4330": 5.75, + "Kodak DX4530": 5.75, + "Kodak DX4900": 5.75, + "Kodak DX6340": 5.33, + "Kodak DX6440": 5.75, + "Kodak DX6490": 5.75, + "Kodak DX7440": 5.75, + "Kodak DX7590": 5.75, + "Kodak DX7630": 7.11, + "Kodak EasyShare C1013": 6.16, + "Kodak EasyShare C135": 6.16, + "Kodak EasyShare C140": 5.75, + "Kodak EasyShare C142": 5.75, + "Kodak EasyShare C143": 6.16, + "Kodak EasyShare C1505": 6.16, + "Kodak EasyShare C1530": 6.16, + "Kodak EasyShare C1550": 6.16, + "Kodak EasyShare C160": 5.75, + "Kodak EasyShare C180": 6.16, + "Kodak EasyShare C182": 6.16, + "Kodak EasyShare C183": 6.16, + "Kodak EasyShare C190": 6.16, + "Kodak EasyShare C195": 6.16, + "Kodak EasyShare C300": 7.11, + "Kodak EasyShare C310": 7.11, + "Kodak EasyShare C330": 7.11, + "Kodak EasyShare C340": 7.11, + "Kodak EasyShare C360": 7.11, + "Kodak EasyShare C433": 5.75, + "Kodak EasyShare C503": 5.75, + "Kodak EasyShare C513": 5.75, + "Kodak EasyShare C530": 5.75, + "Kodak EasyShare C533": 5.75, + "Kodak EasyShare C610": 5.75, + "Kodak EasyShare C613": 5.75, + "Kodak EasyShare C623": 5.75, + "Kodak EasyShare C643": 5.75, + "Kodak EasyShare C653": 5.75, + "Kodak EasyShare C663": 5.75, + "Kodak EasyShare C703": 5.75, + "Kodak EasyShare C713": 5.75, + "Kodak EasyShare C743": 5.75, + "Kodak EasyShare C763": 5.75, + "Kodak EasyShare C813": 5.75, + "Kodak EasyShare C875": 7.11, + "Kodak EasyShare C913": 5.75, + "Kodak EasyShare CD1013": 6.16, + "Kodak EasyShare CD703": 5.75, + "Kodak EasyShare CD80": 6.16, + "Kodak EasyShare CD82": 6.08, + "Kodak EasyShare CD90": 6.08, + "Kodak EasyShare CD93": 5.75, + "Kodak EasyShare CX4200": 5.33, + "Kodak EasyShare CX4230": 5.33, + "Kodak EasyShare CX4300": 5.33, + "Kodak EasyShare CX6200": 5.75, + "Kodak EasyShare CX6230": 5.75, + "Kodak EasyShare CX6330": 5.75, + "Kodak EasyShare CX6445": 5.75, + "Kodak EasyShare CX7220": 5.75, + "Kodak EasyShare CX7300": 5.33, + "Kodak EasyShare CX7330": 5.75, + "Kodak EasyShare CX7430": 5.75, + "Kodak EasyShare CX7525": 5.75, + "Kodak EasyShare CX7530": 5.75, + "Kodak EasyShare LS745": 7.11, + "Kodak EasyShare M1033": 6.16, + "Kodak EasyShare M1063": 5.75, + "Kodak EasyShare M1073 IS": 5.75, + "Kodak EasyShare M1093 IS": 6.16, + "Kodak EasyShare M215": 4.8, + "Kodak EasyShare M320": 5.75, + "Kodak EasyShare M340": 6.16, + "Kodak EasyShare M341": 6.16, + "Kodak EasyShare M380": 6.16, + "Kodak EasyShare M381": 6.16, + "Kodak EasyShare M420": 6.16, + "Kodak EasyShare M522": 6.16, + "Kodak EasyShare M530": 6.16, + "Kodak EasyShare M531": 6.16, + "Kodak EasyShare M532": 6.16, + "Kodak EasyShare M5370": 6.16, + "Kodak EasyShare M550": 6.16, + "Kodak EasyShare M552": 6.16, + "Kodak EasyShare M565": 6.16, + "Kodak EasyShare M575": 6.16, + "Kodak EasyShare M580": 6.16, + "Kodak EasyShare M583": 6.08, + "Kodak EasyShare M750": 6.16, + "Kodak EasyShare M753": 5.75, + "Kodak EasyShare M763": 5.75, + "Kodak EasyShare M853": 5.75, + "Kodak EasyShare M863": 5.75, + "Kodak EasyShare M873": 5.75, + "Kodak EasyShare M883": 5.75, + "Kodak EasyShare M893 IS": 5.75, + "Kodak EasyShare MD1063": 6.08, + "Kodak EasyShare MD30": 6.08, + "Kodak EasyShare MD41": 6.08, + "Kodak EasyShare MD81": 6.08, + "Kodak EasyShare MD853": 5.75, + "Kodak EasyShare MD863": 5.75, + "Kodak EasyShare MX1063": 6.08, + "Kodak EasyShare Max Z990": 6.08, + "Kodak EasyShare Mini": 4.8, + "Kodak EasyShare P712": 5.75, + "Kodak EasyShare P850": 5.75, + "Kodak EasyShare P880": 7.11, + "Kodak EasyShare Sport": 6.16, + "Kodak EasyShare Touch M577": 6.16, + "Kodak EasyShare V1003": 7.11, + "Kodak EasyShare V1073": 7.85, + "Kodak EasyShare V1233": 7.44, + "Kodak EasyShare V1253": 7.44, + "Kodak EasyShare V1273": 7.44, + "Kodak EasyShare V530": 5.75, + "Kodak EasyShare V550": 5.75, + "Kodak EasyShare V570": 5.75, + "Kodak EasyShare V603": 5.75, + "Kodak EasyShare V610": 5.75, + "Kodak EasyShare V705": 5.75, + "Kodak EasyShare V803": 7.11, + "Kodak EasyShare Z1012 IS": 6.08, + "Kodak EasyShare Z1015 IS": 6.08, + "Kodak EasyShare Z1085 IS": 7.85, + "Kodak EasyShare Z1275": 7.44, + "Kodak EasyShare Z1285": 7.44, + "Kodak EasyShare Z1485 IS": 7.44, + "Kodak EasyShare Z5010": 6.16, + "Kodak EasyShare Z5120": 6.08, + "Kodak EasyShare Z612": 5.75, + "Kodak EasyShare Z650": 5.75, + "Kodak EasyShare Z700": 5.75, + "Kodak EasyShare Z710": 5.75, + "Kodak EasyShare Z712 IS": 5.75, + "Kodak EasyShare Z730": 5.75, + "Kodak EasyShare Z740": 5.75, + "Kodak EasyShare Z7590": 5.75, + "Kodak EasyShare Z760": 7.11, + "Kodak EasyShare Z812 IS": 5.75, + "Kodak EasyShare Z8612 IS": 5.75, + "Kodak EasyShare Z885": 7.11, + "Kodak EasyShare Z915": 6.16, + "Kodak EasyShare Z950": 6.08, + "Kodak EasyShare Z980": 6.08, + "Kodak EasyShare Z981": 6.08, + "Kodak EasyShare Z990": 6.08, + "Kodak EasyShare ZD15": 6.16, + "Kodak EasyShare ZD710": 5.75, + "Kodak EasyShare ZD8612 IS": 5.75, + "Kodak EasyShare-One 6MP": 5.75, + "Kodak Easyshare One": 5.75, + "Kodak LS420": 7.11, + "Kodak LS443": 5.33, + "Kodak LS633": 5.75, + "Kodak LS743": 7.11, + "Kodak LS753": 7.11, + "Kodak LS755": 5.75, + "Kodak M590": 4.8, + "Kodak PixPro AZ251": 6.16, + "Kodak PixPro AZ361": 6.16, + "Kodak PixPro AZ362": 6.16, + "Kodak PixPro AZ501": 6.16, + "Kodak PixPro AZ521": 6.16, + "Kodak PixPro AZ522": 6.16, + "Kodak PixPro AZ651": 6.16, + "Kodak PixPro FZ151": 6.16, + "Kodak PixPro FZ201": 6.16, + "Kodak PixPro FZ41": 6.16, + "Kodak PixPro FZ51": 6.16, + "Kodak S-1": 17.3, + "Kodak Slice": 6.16, + "Kodak mc3": 6.4, + "Konica DG-2": 5.33, + "Konica DG-3Z": 5.33, + "Konica Q-M100": 4.8, + "Konica Q-M200": 6.4, + "Konica Revio C2": 4.23, + "Konica Revio KD-200Z": 5.33, + "Konica Revio KD-210Z": 7.11, + "Konica Revio KD-220Z": 4.5, + "Konica Revio KD-25": 7.11, + "Konica Revio KD-300Z": 7.11, + "Konica Revio KD-310Z": 7.11, + "Konica Revio KD-3300Z": 5.75, + "Konica Revio KD-4000Z": 7.11, + "Konica Revio KD-400Z": 7.11, + "Konica Revio KD-410Z": 7.11, + "Konica Revio KD-420Z": 5.75, + "Konica Revio KD-500Z": 7.11, + "Konica Revio KD-510Z": 7.11, + "Konica-Minolta DG-5W": 5.75, + "Konica-Minolta DiMAGE A2": 8.8, + "Konica-Minolta DiMAGE A200": 8.8, + "Konica-Minolta DiMAGE E40": 6.4, + "Konica-Minolta DiMAGE E50": 5.75, + "Konica-Minolta DiMAGE E500": 5.75, + "Konica-Minolta DiMAGE G530": 5.75, + "Konica-Minolta DiMAGE G600": 7.27, + "Konica-Minolta DiMAGE X1": 7.11, + "Konica-Minolta DiMAGE X31": 4.5, + "Konica-Minolta DiMAGE X50": 5.75, + "Konica-Minolta DiMAGE X60": 5.75, + "Konica-Minolta DiMAGE Xg": 5.33, + "Konica-Minolta DiMAGE Z10": 5.75, + "Konica-Minolta DiMAGE Z2": 5.75, + "Konica-Minolta DiMAGE Z20": 5.75, + "Konica-Minolta DiMAGE Z3": 5.75, + "Konica-Minolta DiMAGE Z5": 5.75, + "Konica-Minolta DiMAGE Z6": 5.75, + "Konica-Minolta Dynax 5D": 23.5, + "Konica-Minolta Dynax 7D": 23.5, + "Konica-Minolta e-mini": 8.8, + "Konica-Minolta e-mini D": 8.8, + "Konica-Minolta e-mini M": 8.8, + "Kyocera Finecam 3300": 7.11, + "Kyocera Finecam L3": 5.33, + "Kyocera Finecam L30": 5.33, + "Kyocera Finecam L3v": 5.33, + "Kyocera Finecam L4": 5.75, + "Kyocera Finecam L4v": 7.11, + "Kyocera Finecam M400R": 5.33, + "Kyocera Finecam M410R": 5.33, + "Kyocera Finecam S3": 7.11, + "Kyocera Finecam S3L": 7.11, + "Kyocera Finecam S3R": 7.11, + "Kyocera Finecam S3X": 7.11, + "Kyocera Finecam S4": 7.11, + "Kyocera Finecam S5": 7.11, + "Kyocera Finecam S5R": 7.11, + "Kyocera Finecam SL300R": 5.33, + "Kyocera Finecam SL400R": 5.33, + "LG G3": 4.69, + "LGE Nexus 5": 4.54, + "Leica X1": 23.6, + "Leica C (Typ112)": 7.53, + "Leica C-LUX 1": 5.75, + "Leica C-LUX 2": 5.75, + "Leica C-LUX 3": 6.08, + "Leica D-LUX": 5.75, + "Leica D-LUX 2": 7.76, + "Leica D-LUX 3": 7.76, + "Leica D-LUX 4": 7.85, + "Leica D-LUX 5": 7.85, + "Leica D-Lux 6": 7.53, + "Leica Digilux": 6.4, + "Leica Digilux 1": 7.53, + "Leica Digilux 2": 8.8, + "Leica Digilux 3": 17.3, + "Leica Digilux 4.3": 7.53, + "Leica Digilux Zoom": 6.4, + "Leica M Typ 240": 36.0, + "Leica M-E Typ 220": 35.8, + "Leica M-Monochrom": 35.8, + "Leica M-P": 36.0, + "Leica M8": 27.0, + "Leica M8.2": 27.0, + "Leica M9": 35.8, + "Leica M9 Titanium": 35.8, + "Leica M9-P": 35.8, + "Leica S (Type 007)": 45.0, + "Leica S-E": 45.0, + "Leica S2": 45.0, + "Leica T (Typ 701)": 23.6, + "Leica V-LUX 1": 7.11, + "Leica V-LUX 2": 6.08, + "Leica V-LUX 20": 6.08, + "Leica V-LUX 3": 6.08, + "Leica V-LUX 30": 6.08, + "Leica V-Lux 4": 6.16, + "Leica V-Lux 40": 6.08, + "Leica X (Typ 113)": 23.6, + "Leica X Vario": 23.6, + "Leica X-E": 23.6, + "Leica X2": 23.6, + "Minolta DiMAGE 2300": 7.53, + "Minolta DiMAGE 2330": 7.53, + "Minolta DiMAGE 5": 7.11, + "Minolta DiMAGE 7": 8.8, + "Minolta DiMAGE 7Hi": 8.8, + "Minolta DiMAGE 7i": 8.8, + "Minolta DiMAGE A1": 8.8, + "Minolta DiMAGE E201": 7.53, + "Minolta DiMAGE E203": 5.33, + "Minolta DiMAGE E223": 5.33, + "Minolta DiMAGE E323": 5.33, + "Minolta DiMAGE EX 1500 Wide": 6.4, + "Minolta DiMAGE EX 1500 Zoom": 6.4, + "Minolta DiMAGE F100": 7.11, + "Minolta DiMAGE F200": 7.11, + "Minolta DiMAGE F300": 7.11, + "Minolta DiMAGE G400": 5.75, + "Minolta DiMAGE G500": 7.11, + "Minolta DiMAGE S304": 7.11, + "Minolta DiMAGE S404": 7.11, + "Minolta DiMAGE S414": 7.11, + "Minolta DiMAGE X": 5.33, + "Minolta DiMAGE X20": 4.5, + "Minolta DiMAGE Xi": 5.33, + "Minolta DiMAGE Xt": 5.33, + "Minolta DiMAGE Z1": 5.33, + "Minolta RD-3000": 6.4, + "Minox Classic Leica M3 2.1": 6.4, + "Minox Classic Leica M3 3MP": 6.4, + "Minox Classic Leica M3 4MP": 6.4, + "Minox Classic Leica M3 5MP": 6.4, + "Minox DC 1011": 7.53, + "Minox DC 1011 Carat": 7.53, + "Minox DC 1022": 7.53, + "Minox DC 1033": 5.75, + "Minox DC 1044": 6.08, + "Minox DC 1055": 6.08, + "Minox DC 1211": 6.08, + "Minox DC 1222": 6.16, + "Minox DC 1233": 6.08, + "Minox DC 1311": 5.33, + "Minox DC 1422": 6.08, + "Minox DC 2111": 5.33, + "Minox DC 2122": 5.33, + "Minox DC 2133": 4.5, + "Minox DC 3311": 7.11, + "Minox DC 4011": 7.11, + "Minox DC 4211": 5.75, + "Minox DC 5011": 5.75, + "Minox DC 5211": 7.11, + "Minox DC 5222": 5.75, + "Minox DC 6011": 5.75, + "Minox DC 6033 WP": 5.75, + "Minox DC 6211": 5.75, + "Minox DC 6311": 7.11, + "Minox DC 7011": 5.75, + "Minox DC 7022": 5.75, + "Minox DC 7411": 5.75, + "Minox DC 8011": 5.75, + "Minox DC 8022 WP": 5.75, + "Minox DC 8111": 7.11, + "Minox DC 8122": 7.11, + "Minox DC 9011 WP": 6.16, + "Minox DCC 14.0": 6.16, + "Minox DCC 5.0 White Edition": 6.16, + "Minox DCC 5.1": 6.16, + "Minox DCC Leica M3 5MP Gold": 6.16, + "Minox DCC Rolleiflex AF 5.0": 6.4, + "Minox DD1": 6.4, + "Minox DD1 Diamond": 6.4, + "Minox DD100": 6.4, + "Minox DD200": 6.4, + "Minox DM 1": 6.4, + "Minox Mobi DV": 6.4, + "Minox Rolleiflex MiniDigi": 6.4, + "Nikon 1 AW1": 13.2, + "Nikon 1 J1": 13.2, + "Nikon 1 J2": 13.2, + "Nikon 1 J3": 13.2, + "Nikon 1 J4": 13.2, + "Nikon 1 S1": 13.2, + "Nikon 1 S2": 13.2, + "Nikon 1 V1": 13.2, + "Nikon 1 V2": 13.2, + "Nikon 1 V3": 13.2, + "Nikon Coolpix 100": 4.8, + "Nikon Coolpix 2000": 5.33, + "Nikon Coolpix 2100": 4.5, + "Nikon Coolpix 2200": 4.5, + "Nikon Coolpix 2500": 5.33, + "Nikon Coolpix 300": 4.8, + "Nikon Coolpix 3100": 5.33, + "Nikon Coolpix 3200": 5.33, + "Nikon Coolpix 3500": 5.33, + "Nikon Coolpix 3700": 5.33, + "Nikon Coolpix 4100": 5.75, + "Nikon Coolpix 4200": 7.11, + "Nikon Coolpix 4300": 7.11, + "Nikon Coolpix 4500": 7.11, + "Nikon Coolpix 4600": 5.75, + "Nikon Coolpix 4800": 5.75, + "Nikon Coolpix 5000": 8.8, + "Nikon Coolpix 5200": 7.11, + "Nikon Coolpix 5400": 7.11, + "Nikon Coolpix 5600": 5.75, + "Nikon Coolpix 5700": 8.8, + "Nikon Coolpix 5900": 7.11, + "Nikon Coolpix 600": 5.33, + "Nikon Coolpix 700": 6.4, + "Nikon Coolpix 7600": 7.11, + "Nikon Coolpix 775": 5.33, + "Nikon Coolpix 7900": 7.11, + "Nikon Coolpix 800": 6.4, + "Nikon Coolpix 8400": 8.8, + "Nikon Coolpix 8700": 8.8, + "Nikon Coolpix 880": 7.11, + "Nikon Coolpix 8800": 8.8, + "Nikon Coolpix 885": 7.11, + "Nikon Coolpix 900": 5.33, + "Nikon Coolpix 900s": 5.33, + "Nikon Coolpix 910": 6.4, + "Nikon Coolpix 950": 6.4, + "Nikon Coolpix 990": 7.11, + "Nikon Coolpix 995": 7.11, + "Nikon Coolpix A": 23.6, + "Nikon Coolpix AW100": 6.16, + "Nikon Coolpix AW100s": 6.16, + "Nikon Coolpix AW110": 6.16, + "Nikon Coolpix AW120": 6.16, + "Nikon Coolpix L1": 5.75, + "Nikon Coolpix L10": 5.75, + "Nikon Coolpix L100": 6.08, + "Nikon Coolpix L101": 5.75, + "Nikon Coolpix L11": 5.75, + "Nikon Coolpix L110": 6.16, + "Nikon Coolpix L12": 5.75, + "Nikon Coolpix L120": 6.16, + "Nikon Coolpix L14": 5.75, + "Nikon Coolpix L15": 5.75, + "Nikon Coolpix L16": 5.75, + "Nikon Coolpix L18": 5.75, + "Nikon Coolpix L19": 5.75, + "Nikon Coolpix L20": 6.08, + "Nikon Coolpix L21": 6.08, + "Nikon Coolpix L22": 6.16, + "Nikon Coolpix L23": 4.96, + "Nikon Coolpix L24": 6.16, + "Nikon Coolpix L25": 4.8, + "Nikon Coolpix L26": 6.16, + "Nikon Coolpix L27": 6.16, + "Nikon Coolpix L28": 6.16, + "Nikon Coolpix L29": 6.16, + "Nikon Coolpix L30": 6.16, + "Nikon Coolpix L310": 6.16, + "Nikon Coolpix L320": 6.16, + "Nikon Coolpix L330": 6.16, + "Nikon Coolpix L5": 5.75, + "Nikon Coolpix L6": 5.75, + "Nikon Coolpix L610": 6.16, + "Nikon Coolpix L620": 6.16, + "Nikon Coolpix L810": 6.16, + "Nikon Coolpix L820": 6.16, + "Nikon Coolpix L830": 6.16, + "Nikon Coolpix P1": 7.11, + "Nikon Coolpix P100": 6.16, + "Nikon Coolpix P2": 7.11, + "Nikon Coolpix P3": 7.11, + "Nikon Coolpix P300": 6.16, + "Nikon Coolpix P310": 6.16, + "Nikon Coolpix P330": 7.53, + "Nikon Coolpix P340": 7.53, + "Nikon Coolpix P4": 7.11, + "Nikon Coolpix P50": 5.75, + "Nikon Coolpix P500": 6.16, + "Nikon Coolpix P5000": 7.11, + "Nikon Coolpix P510": 6.16, + "Nikon Coolpix P5100": 7.44, + "Nikon Coolpix P520": 6.16, + "Nikon Coolpix P530": 6.16, + "Nikon Coolpix P60": 5.75, + "Nikon Coolpix P600": 6.16, + "Nikon Coolpix P6000": 7.44, + "Nikon Coolpix P7000": 7.53, + "Nikon Coolpix P7100": 7.53, + "Nikon Coolpix P7700": 7.53, + "Nikon Coolpix P7800": 7.53, + "Nikon Coolpix P80": 6.08, + "Nikon Coolpix P90": 6.08, + "Nikon Coolpix S01": 4.96, + "Nikon Coolpix S02": 4.8, + "Nikon Coolpix S1": 5.75, + "Nikon Coolpix S10": 5.75, + "Nikon Coolpix S100": 6.16, + "Nikon Coolpix S1000pj": 6.16, + "Nikon Coolpix S1100pj": 6.16, + "Nikon Coolpix S1200pj": 6.16, + "Nikon Coolpix S2": 5.75, + "Nikon Coolpix S200": 5.75, + "Nikon Coolpix S210": 5.75, + "Nikon Coolpix S220": 6.08, + "Nikon Coolpix S225": 6.08, + "Nikon Coolpix S230": 6.08, + "Nikon Coolpix S2500": 6.16, + "Nikon Coolpix S2600": 6.16, + "Nikon Coolpix S2700": 6.16, + "Nikon Coolpix S2750": 6.16, + "Nikon Coolpix S2800 ": 6.16, + "Nikon Coolpix S3": 5.75, + "Nikon Coolpix S30": 4.8, + "Nikon Coolpix S3000": 6.16, + "Nikon Coolpix S31": 4.96, + "Nikon Coolpix S3100": 6.16, + "Nikon Coolpix S32": 4.8, + "Nikon Coolpix S3200": 6.16, + "Nikon Coolpix S3300": 6.16, + "Nikon Coolpix S3400": 6.16, + "Nikon Coolpix S3500": 6.16, + "Nikon Coolpix S3600": 6.16, + "Nikon Coolpix S4": 5.75, + "Nikon Coolpix S4000": 6.16, + "Nikon Coolpix S4100": 6.16, + "Nikon Coolpix S4150": 6.16, + "Nikon Coolpix S4200": 6.16, + "Nikon Coolpix S4300": 6.16, + "Nikon Coolpix S4400": 6.16, + "Nikon Coolpix S5": 5.75, + "Nikon Coolpix S50": 5.75, + "Nikon Coolpix S500": 5.75, + "Nikon Coolpix S50c": 5.75, + "Nikon Coolpix S51": 5.75, + "Nikon Coolpix S510": 5.75, + "Nikon Coolpix S5100": 6.16, + "Nikon Coolpix S51c": 5.75, + "Nikon Coolpix S52": 5.75, + "Nikon Coolpix S520": 5.75, + "Nikon Coolpix S5200": 6.16, + "Nikon Coolpix S52c": 5.75, + "Nikon Coolpix S5300": 6.16, + "Nikon Coolpix S550": 6.16, + "Nikon Coolpix S560": 6.08, + "Nikon Coolpix S570": 6.16, + "Nikon Coolpix S6": 5.75, + "Nikon Coolpix S60": 6.16, + "Nikon Coolpix S600": 6.08, + "Nikon Coolpix S6000": 6.16, + "Nikon Coolpix S610": 6.08, + "Nikon Coolpix S6100": 6.16, + "Nikon Coolpix S610c": 6.08, + "Nikon Coolpix S6150": 6.16, + "Nikon Coolpix S620": 6.08, + "Nikon Coolpix S6200": 6.16, + "Nikon Coolpix S630": 6.08, + "Nikon Coolpix S6300": 6.16, + "Nikon Coolpix S640": 6.08, + "Nikon Coolpix S6400": 6.16, + "Nikon Coolpix S6500": 6.16, + "Nikon Coolpix S6600 ": 6.16, + "Nikon Coolpix S6700": 6.16, + "Nikon Coolpix S6800": 6.16, + "Nikon Coolpix S6900": 6.16, + "Nikon Coolpix S70": 6.16, + "Nikon Coolpix S700": 7.44, + "Nikon Coolpix S710": 7.44, + "Nikon Coolpix S7c": 5.75, + "Nikon Coolpix S80": 6.16, + "Nikon Coolpix S8000": 6.16, + "Nikon Coolpix S800c": 6.16, + "Nikon Coolpix S8100": 6.16, + "Nikon Coolpix S810c": 6.16, + "Nikon Coolpix S8200": 6.16, + "Nikon Coolpix S9": 5.75, + "Nikon Coolpix S9050": 6.16, + "Nikon Coolpix S9100": 6.16, + "Nikon Coolpix S9200": 6.16, + "Nikon Coolpix S9300": 6.16, + "Nikon Coolpix S9400": 6.16, + "Nikon Coolpix S9500": 6.16, + "Nikon Coolpix S9600": 6.16, + "Nikon Coolpix S9700": 6.16, + "Nikon Coolpix SQ": 5.33, + "Nikon D1": 23.7, + "Nikon D100": 23.7, + "Nikon D1H": 23.7, + "Nikon D1X": 23.7, + "Nikon D200": 23.6, + "Nikon D2H": 23.7, + "Nikon D2Hs": 23.2, + "Nikon D2X": 23.7, + "Nikon D2xs": 23.7, + "Nikon D3": 36.0, + "Nikon D300": 23.6, + "Nikon D3000": 23.6, + "Nikon D300s": 23.6, + "Nikon D3100": 23.1, + "Nikon D3200": 23.2, + "Nikon D3300": 23.5, + "Nikon D3X": 35.9, + "Nikon D3s": 36.0, + "Nikon D4": 36.0, + "Nikon D40": 23.7, + "Nikon D40x": 23.6, + "Nikon D4s": 36.0, + "Nikon D50": 23.7, + "Nikon D5000": 23.6, + "Nikon D5100": 23.6, + "Nikon D5200": 23.5, + "Nikon D5300": 23.5, + "Nikon D60": 23.6, + "Nikon D600": 35.9, + "Nikon D610": 35.9, + "Nikon D70": 23.7, + "Nikon D700": 36.0, + "Nikon D7000": 23.6, + "Nikon D70s": 23.7, + "Nikon D7100": 23.5, + "Nikon D750": 35.9, + "Nikon D80": 23.6, + "Nikon D800": 35.9, + "Nikon D800E": 35.9, + "Nikon D810": 35.9, + "Nikon D90": 23.6, + "Nikon Df": 36.0, + "Nikon E2Ns": 8.8, + "Nikon E2n": 8.8, + "Nikon E2s": 8.8, + "Nikon E3": 8.8, + "Nikon E3s": 8.8, + "Nokia 808 PureView": 10.82, + "Nokia Lumia 1020": 8.64, + "Nokia Lumia 920": 4.8, + "Olympus ": 3.2, + "Olympus E-300 / EVOLT E-300": 17.3, + "Olympus AZ-1": 5.33, + "Olympus AZ-1 Ferrari 2004": 5.33, + "Olympus AZ-2 Zoom": 5.33, + "Olympus C-1": 4.5, + "Olympus C-1 Zoom": 4.5, + "Olympus C-100": 4.5, + "Olympus C-1000L": 6.4, + "Olympus C-120": 4.5, + "Olympus C-1400L": 8.8, + "Olympus C-1400XL": 8.8, + "Olympus C-150": 4.5, + "Olympus C-160": 5.33, + "Olympus C-170": 5.75, + "Olympus C-180": 5.75, + "Olympus C-2": 5.33, + "Olympus C-200 Zoom": 5.33, + "Olympus C-2000 Zoom": 6.4, + "Olympus C-2020 Zoom": 6.4, + "Olympus C-2040 Zoom": 6.4, + "Olympus C-21": 6.4, + "Olympus C-2100 UZ": 6.4, + "Olympus C-220 Zoom": 4.5, + "Olympus C-2500 L": 8.8, + "Olympus C-300 Zoom": 5.75, + "Olympus C-3000 Zoom": 7.11, + "Olympus C-3020 Zoom": 7.11, + "Olympus C-3030 Zoom": 7.11, + "Olympus C-3040 Zoom": 7.11, + "Olympus C-310 Zoom": 5.33, + "Olympus C-315 Zoom": 5.75, + "Olympus C-350 Zoom": 5.75, + "Olympus C-360 Zoom": 5.75, + "Olympus C-370 Zoom": 5.33, + "Olympus C-40 Zoom": 7.11, + "Olympus C-4000 Zoom": 7.11, + "Olympus C-4040 Zoom": 7.11, + "Olympus C-450 Zoom": 5.75, + "Olympus C-460 Zoom del Sol": 5.75, + "Olympus C-470 Zoom": 5.75, + "Olympus C-480 Zoom": 5.75, + "Olympus C-50 Zoom": 7.11, + "Olympus C-5000 Zoom": 7.27, + "Olympus C-5050 Zoom": 7.11, + "Olympus C-5060 Wide Zoom": 7.11, + "Olympus C-55 Zoom": 7.11, + "Olympus C-5500 Sport Zoom": 7.11, + "Olympus C-60 Zoom": 7.27, + "Olympus C-70 Zoom": 7.11, + "Olympus C-700 UZ": 5.33, + "Olympus C-7000 Zoom": 7.11, + "Olympus C-7070 Wide Zoom": 7.11, + "Olympus C-720 UZ": 5.75, + "Olympus C-730 UZ": 5.33, + "Olympus C-740 UZ": 5.75, + "Olympus C-750 UZ": 5.33, + "Olympus C-760 UZ": 5.33, + "Olympus C-765 UZ": 5.75, + "Olympus C-770 UZ": 5.75, + "Olympus C-8080 Wide Zoom": 8.8, + "Olympus C-820L": 4.8, + "Olympus C-840L": 5.33, + "Olympus C-860L": 5.33, + "Olympus C-900 Zoom": 5.33, + "Olympus C-920 Zoom": 5.33, + "Olympus C-960 Zoom": 5.33, + "Olympus C-990 Zoom": 5.33, + "Olympus D-150Z": 4.5, + "Olympus D-200L": 8.8, + "Olympus D-300L": 8.8, + "Olympus D-340L": 8.8, + "Olympus D-340R": 6.4, + "Olympus D-360L": 7.11, + "Olympus D-370": 4.5, + "Olympus D-380": 4.5, + "Olympus D-390": 4.5, + "Olympus D-395": 5.33, + "Olympus D-40 Zoom": 7.11, + "Olympus D-400 Zoom": 6.4, + "Olympus D-425": 5.75, + "Olympus D-435": 5.75, + "Olympus D-450 Zoom": 5.33, + "Olympus D-460 Zoom": 7.11, + "Olympus D-490 Zoom": 5.33, + "Olympus D-500L": 8.8, + "Olympus D-510 Zoom": 5.33, + "Olympus D-520 Zoom": 4.5, + "Olympus D-535 Zoom": 5.33, + "Olympus D-540 Zoom": 5.33, + "Olympus D-545 Zoom": 5.75, + "Olympus D-560 Zoom": 5.75, + "Olympus D-580 Zoom": 5.33, + "Olympus D-595 Zoom": 5.75, + "Olympus D-600L": 8.8, + "Olympus D-620L": 8.8, + "Olympus D-630 Zoom": 5.75, + "Olympus E-1": 17.3, + "Olympus E-10": 8.8, + "Olympus E-100 RS": 6.4, + "Olympus E-20": 8.8, + "Olympus E-3": 17.3, + "Olympus E-30": 17.3, + "Olympus E-400": 17.3, + "Olympus E-410 / EVOLT E-410": 17.3, + "Olympus E-420": 17.3, + "Olympus E-450": 17.3, + "Olympus E-5": 17.3, + "Olympus E-500 / EVOLT E-500": 17.3, + "Olympus E-510 / EVOLT E-510": 17.3, + "Olympus E-520": 17.3, + "Olympus E-600": 17.3, + "Olympus E-620": 17.3, + "Olympus FE-100": 5.75, + "Olympus FE-110": 5.75, + "Olympus FE-115": 5.75, + "Olympus FE-120": 5.75, + "Olympus FE-130": 5.75, + "Olympus FE-140": 5.75, + "Olympus FE-150": 5.75, + "Olympus FE-160": 5.75, + "Olympus FE-170": 5.75, + "Olympus FE-180": 5.75, + "Olympus FE-190": 5.75, + "Olympus FE-20": 6.03, + "Olympus FE-200": 5.75, + "Olympus FE-210": 5.75, + "Olympus FE-220": 5.75, + "Olympus FE-230": 5.75, + "Olympus FE-240": 5.75, + "Olympus FE-25": 6.08, + "Olympus FE-250": 7.11, + "Olympus FE-26": 6.08, + "Olympus FE-270": 5.75, + "Olympus FE-280": 6.16, + "Olympus FE-290": 5.75, + "Olympus FE-300": 7.44, + "Olympus FE-3000": 6.08, + "Olympus FE-3010": 6.08, + "Olympus FE-310": 5.75, + "Olympus FE-340": 6.03, + "Olympus FE-350": 5.75, + "Olympus FE-360": 6.03, + "Olympus FE-370": 6.03, + "Olympus FE-4000": 6.16, + "Olympus FE-4010": 6.16, + "Olympus FE-4020": 6.16, + "Olympus FE-4030": 6.08, + "Olympus FE-4040": 6.08, + "Olympus FE-4050": 6.16, + "Olympus FE-45": 6.08, + "Olympus FE-47": 6.08, + "Olympus FE-48": 6.16, + "Olympus FE-5000": 6.08, + "Olympus FE-5010": 6.08, + "Olympus FE-5020": 6.16, + "Olympus FE-5030": 6.08, + "Olympus FE-5035": 6.08, + "Olympus FE-5040": 6.16, + "Olympus FE-5050": 6.16, + "Olympus IR 500": 5.33, + "Olympus IR-300": 5.75, + "Olympus Mju 1060": 6.08, + "Olympus Mju 5000": 6.08, + "Olympus Mju 7050": 6.16, + "Olympus OM-D E-M1": 17.3, + "Olympus OM-D E-M10": 17.3, + "Olympus OM-D E-M5": 17.3, + "Olympus PEN E-P1": 17.3, + "Olympus PEN E-P2": 17.3, + "Olympus PEN E-P3": 17.3, + "Olympus PEN E-P5": 17.3, + "Olympus PEN E-PL1": 17.3, + "Olympus PEN E-PL1s": 17.3, + "Olympus PEN E-PL2": 17.3, + "Olympus PEN E-PL3": 17.3, + "Olympus PEN E-PL5": 17.3, + "Olympus PEN E-PL6": 17.3, + "Olympus PEN E-PL7": 17.3, + "Olympus PEN E-PM1": 17.3, + "Olympus PEN E-PM2": 17.3, + "Olympus SH-21": 6.16, + "Olympus SH-25MR": 6.16, + "Olympus SH-50 iHS": 6.16, + "Olympus SP 310": 7.11, + "Olympus SP 320": 7.11, + "Olympus SP 350": 7.11, + "Olympus SP 500 UZ": 5.75, + "Olympus SP 510 UZ": 5.75, + "Olympus SP 550 UZ": 5.75, + "Olympus SP 560 UZ": 6.16, + "Olympus SP 570 UZ": 6.08, + "Olympus SP 590 UZ": 6.08, + "Olympus SP 600 UZ": 6.08, + "Olympus SP 700": 5.75, + "Olympus SP 800 UZ": 6.08, + "Olympus SP 810 UZ": 6.16, + "Olympus SP-100": 6.16, + "Olympus SP-565UZ": 6.08, + "Olympus SP-610UZ": 6.16, + "Olympus SP-620 UZ": 6.16, + "Olympus SP-720UZ": 6.16, + "Olympus SZ-10": 6.16, + "Olympus SZ-11": 6.16, + "Olympus SZ-12": 6.16, + "Olympus SZ-14": 6.16, + "Olympus SZ-15": 6.16, + "Olympus SZ-16": 6.16, + "Olympus SZ-20": 6.16, + "Olympus SZ-30MR": 6.16, + "Olympus SZ-31MR iHS": 6.16, + "Olympus Stylus 1": 7.53, + "Olympus Stylus 1000": 7.11, + "Olympus Stylus 1010": 6.08, + "Olympus Stylus 1020": 6.08, + "Olympus Stylus 1030 SW": 6.08, + "Olympus Stylus 1040": 6.08, + "Olympus Stylus 1050 SW": 6.08, + "Olympus Stylus 1200": 7.44, + "Olympus Stylus 300": 5.75, + "Olympus Stylus 400": 5.75, + "Olympus Stylus 410": 5.75, + "Olympus Stylus 500": 5.75, + "Olympus Stylus 5010": 6.08, + "Olympus Stylus 550WP": 6.08, + "Olympus Stylus 600": 5.75, + "Olympus Stylus 700": 6.16, + "Olympus Stylus 7000": 6.08, + "Olympus Stylus 7010": 6.08, + "Olympus Stylus 7030": 6.08, + "Olympus Stylus 7040": 6.08, + "Olympus Stylus 720 SW": 6.16, + "Olympus Stylus 725 SW": 6.16, + "Olympus Stylus 730": 6.16, + "Olympus Stylus 740": 6.16, + "Olympus Stylus 750": 6.16, + "Olympus Stylus 760": 6.16, + "Olympus Stylus 770 SW": 6.16, + "Olympus Stylus 780": 6.16, + "Olympus Stylus 790 SW": 6.16, + "Olympus Stylus 800": 7.11, + "Olympus Stylus 810": 7.11, + "Olympus Stylus 820": 6.16, + "Olympus Stylus 830": 6.16, + "Olympus Stylus 840": 6.03, + "Olympus Stylus 850 SW": 6.03, + "Olympus Stylus 9000": 6.08, + "Olympus Stylus 9010": 6.08, + "Olympus Stylus SH-1": 6.16, + "Olympus Stylus SP-820UZ": 6.16, + "Olympus Stylus Tough 6000": 6.16, + "Olympus Stylus Tough 6010": 6.16, + "Olympus Stylus Tough 6020": 6.08, + "Olympus Stylus Tough 8000": 6.08, + "Olympus Stylus Tough 8010": 6.08, + "Olympus Stylus Tough-3000": 6.08, + "Olympus Stylus Verve": 5.75, + "Olympus Stylus Verve S": 5.75, + "Olympus Stylus XZ-10": 6.16, + "Olympus T-10": 6.16, + "Olympus T-100": 6.16, + "Olympus T-110": 6.16, + "Olympus TG-310": 6.16, + "Olympus TG-320": 6.16, + "Olympus TG-610": 6.16, + "Olympus TG-630 iHS": 6.16, + "Olympus TG-810": 6.16, + "Olympus TG-820 iHS": 6.16, + "Olympus TG-830 iHS": 6.16, + "Olympus TG-850 iHS": 6.16, + "Olympus Tough TG-1 iHS": 6.16, + "Olympus Tough TG-2 iHS": 6.16, + "Olympus Tough TG-3": 6.16, + "Olympus Tough TG-620": 6.16, + "Olympus VG-110": 6.16, + "Olympus VG-120": 6.16, + "Olympus VG-130": 6.16, + "Olympus VG-145": 6.16, + "Olympus VG-150": 6.16, + "Olympus VG-160": 6.16, + "Olympus VG-165": 6.16, + "Olympus VG-170": 6.16, + "Olympus VG-180": 6.16, + "Olympus VG-190": 6.16, + "Olympus VH-210": 6.16, + "Olympus VH-410": 6.16, + "Olympus VH-510": 6.16, + "Olympus VH-515": 6.16, + "Olympus VH-520": 6.16, + "Olympus VR-310": 6.16, + "Olympus VR-320": 6.16, + "Olympus VR-330": 6.16, + "Olympus VR-340": 6.16, + "Olympus VR-350": 6.16, + "Olympus VR-360": 6.16, + "Olympus VR-370": 6.16, + "Olympus X-15": 6.03, + "Olympus X-775": 5.75, + "Olympus X-785": 5.75, + "Olympus X-905": 6.16, + "Olympus X-920": 6.16, + "Olympus XZ-1": 7.85, + "Olympus XZ-2 iHS": 7.53, + "Olympus mju 400 Digital Ferrari": 5.75, + "Olympus mju 800 black": 7.11, + "Olympus mju mini Digital": 5.75, + "Olympus mju mini Digital S": 5.75, + "Panasonic Lumix DMC-LZ20": 6.08, + "Panasonic D-snap SV-AS10": 4.5, + "Panasonic D-snap SV-AS3": 4.5, + "Panasonic D-snap SV-AS30": 4.5, + "Panasonic Lumix DMC-3D1": 6.16, + "Panasonic Lumix DMC-F1": 5.75, + "Panasonic Lumix DMC-F3": 6.08, + "Panasonic Lumix DMC-F5": 6.08, + "Panasonic Lumix DMC-F7": 5.33, + "Panasonic Lumix DMC-FH1": 6.08, + "Panasonic Lumix DMC-FH10": 6.08, + "Panasonic Lumix DMC-FH2": 6.08, + "Panasonic Lumix DMC-FH20": 6.08, + "Panasonic Lumix DMC-FH22": 6.08, + "Panasonic Lumix DMC-FH25": 6.16, + "Panasonic Lumix DMC-FH27": 6.16, + "Panasonic Lumix DMC-FH3": 6.08, + "Panasonic Lumix DMC-FH4": 6.08, + "Panasonic Lumix DMC-FH5": 6.08, + "Panasonic Lumix DMC-FH6": 6.08, + "Panasonic Lumix DMC-FH7": 6.08, + "Panasonic Lumix DMC-FH8": 6.08, + "Panasonic Lumix DMC-FP1": 6.08, + "Panasonic Lumix DMC-FP2": 6.08, + "Panasonic Lumix DMC-FP3": 6.08, + "Panasonic Lumix DMC-FP5": 6.08, + "Panasonic Lumix DMC-FP7": 6.08, + "Panasonic Lumix DMC-FP8": 6.08, + "Panasonic Lumix DMC-FS10": 6.08, + "Panasonic Lumix DMC-FS11": 6.08, + "Panasonic Lumix DMC-FS12": 6.08, + "Panasonic Lumix DMC-FS15": 6.08, + "Panasonic Lumix DMC-FS16": 6.08, + "Panasonic Lumix DMC-FS18": 6.08, + "Panasonic Lumix DMC-FS2": 5.75, + "Panasonic Lumix DMC-FS20": 6.08, + "Panasonic Lumix DMC-FS22": 6.08, + "Panasonic Lumix DMC-FS25": 6.08, + "Panasonic Lumix DMC-FS28": 6.08, + "Panasonic Lumix DMC-FS3": 5.75, + "Panasonic Lumix DMC-FS30": 6.08, + "Panasonic Lumix DMC-FS33": 6.08, + "Panasonic Lumix DMC-FS35": 6.16, + "Panasonic Lumix DMC-FS37": 6.16, + "Panasonic Lumix DMC-FS40": 6.08, + "Panasonic Lumix DMC-FS42": 5.75, + "Panasonic Lumix DMC-FS45": 6.08, + "Panasonic Lumix DMC-FS5": 6.08, + "Panasonic Lumix DMC-FS6": 5.75, + "Panasonic Lumix DMC-FS62": 5.75, + "Panasonic Lumix DMC-FS7": 5.75, + "Panasonic Lumix DMC-FT1": 6.08, + "Panasonic Lumix DMC-FT10": 6.08, + "Panasonic Lumix DMC-FT2": 6.08, + "Panasonic Lumix DMC-FT20": 6.08, + "Panasonic Lumix DMC-FT3": 6.08, + "Panasonic Lumix DMC-FT4": 6.08, + "Panasonic Lumix DMC-FX01": 5.75, + "Panasonic Lumix DMC-FX07": 5.75, + "Panasonic Lumix DMC-FX1": 5.75, + "Panasonic Lumix DMC-FX10": 5.75, + "Panasonic Lumix DMC-FX100": 7.44, + "Panasonic Lumix DMC-FX12": 5.75, + "Panasonic Lumix DMC-FX150": 7.44, + "Panasonic Lumix DMC-FX2": 5.75, + "Panasonic Lumix DMC-FX3": 5.75, + "Panasonic Lumix DMC-FX30": 5.75, + "Panasonic Lumix DMC-FX33": 5.75, + "Panasonic Lumix DMC-FX35": 6.08, + "Panasonic Lumix DMC-FX37": 6.08, + "Panasonic Lumix DMC-FX40": 6.08, + "Panasonic Lumix DMC-FX48": 6.08, + "Panasonic Lumix DMC-FX5": 5.75, + "Panasonic Lumix DMC-FX50": 5.75, + "Panasonic Lumix DMC-FX500": 6.08, + "Panasonic Lumix DMC-FX55": 5.75, + "Panasonic Lumix DMC-FX550": 6.08, + "Panasonic Lumix DMC-FX580": 6.08, + "Panasonic Lumix DMC-FX60": 6.08, + "Panasonic Lumix DMC-FX65": 6.08, + "Panasonic Lumix DMC-FX66": 6.08, + "Panasonic Lumix DMC-FX68": 6.08, + "Panasonic Lumix DMC-FX7": 5.75, + "Panasonic Lumix DMC-FX70": 6.08, + "Panasonic Lumix DMC-FX700": 6.08, + "Panasonic Lumix DMC-FX75": 6.08, + "Panasonic Lumix DMC-FX77": 6.08, + "Panasonic Lumix DMC-FX78": 6.08, + "Panasonic Lumix DMC-FX8": 5.75, + "Panasonic Lumix DMC-FX80": 6.08, + "Panasonic Lumix DMC-FX9": 5.75, + "Panasonic Lumix DMC-FX90": 6.08, + "Panasonic Lumix DMC-FZ1": 4.5, + "Panasonic Lumix DMC-FZ10": 5.75, + "Panasonic Lumix DMC-FZ100": 6.08, + "Panasonic Lumix DMC-FZ1000": 13.2, + "Panasonic Lumix DMC-FZ15": 5.75, + "Panasonic Lumix DMC-FZ150": 6.08, + "Panasonic Lumix DMC-FZ18": 5.75, + "Panasonic Lumix DMC-FZ2": 4.5, + "Panasonic Lumix DMC-FZ20": 5.75, + "Panasonic Lumix DMC-FZ200": 6.16, + "Panasonic Lumix DMC-FZ28": 6.08, + "Panasonic Lumix DMC-FZ3": 4.5, + "Panasonic Lumix DMC-FZ30": 7.11, + "Panasonic Lumix DMC-FZ35": 6.08, + "Panasonic Lumix DMC-FZ38": 6.08, + "Panasonic Lumix DMC-FZ4": 5.75, + "Panasonic Lumix DMC-FZ40": 6.08, + "Panasonic Lumix DMC-FZ45": 6.08, + "Panasonic Lumix DMC-FZ47": 6.08, + "Panasonic Lumix DMC-FZ48": 6.08, + "Panasonic Lumix DMC-FZ5": 5.75, + "Panasonic Lumix DMC-FZ50": 7.11, + "Panasonic Lumix DMC-FZ60": 6.08, + "Panasonic Lumix DMC-FZ7": 5.75, + "Panasonic Lumix DMC-FZ70": 6.16, + "Panasonic Lumix DMC-FZ8": 5.75, + "Panasonic Lumix DMC-G1": 17.3, + "Panasonic Lumix DMC-G10": 17.3, + "Panasonic Lumix DMC-G2": 17.3, + "Panasonic Lumix DMC-G3": 17.3, + "Panasonic Lumix DMC-G5": 17.3, + "Panasonic Lumix DMC-G6": 17.3, + "Panasonic Lumix DMC-GF1": 17.3, + "Panasonic Lumix DMC-GF2": 17.3, + "Panasonic Lumix DMC-GF3": 17.3, + "Panasonic Lumix DMC-GF5": 17.3, + "Panasonic Lumix DMC-GF6": 17.3, + "Panasonic Lumix DMC-GH1": 17.3, + "Panasonic Lumix DMC-GH2": 17.3, + "Panasonic Lumix DMC-GH3": 17.3, + "Panasonic Lumix DMC-GH4": 17.3, + "Panasonic Lumix DMC-GM1": 17.3, + "Panasonic Lumix DMC-GM5": 17.3, + "Panasonic Lumix DMC-GX1": 17.3, + "Panasonic Lumix DMC-GX7": 17.3, + "Panasonic Lumix DMC-L1": 17.3, + "Panasonic Lumix DMC-L10": 17.3, + "Panasonic Lumix DMC-LC1": 8.8, + "Panasonic Lumix DMC-LC20": 5.33, + "Panasonic Lumix DMC-LC33": 5.75, + "Panasonic Lumix DMC-LC40": 7.53, + "Panasonic Lumix DMC-LC43": 5.75, + "Panasonic Lumix DMC-LC5": 7.53, + "Panasonic Lumix DMC-LC50": 5.75, + "Panasonic Lumix DMC-LC70": 5.75, + "Panasonic Lumix DMC-LC80": 5.75, + "Panasonic Lumix DMC-LF1": 7.53, + "Panasonic Lumix DMC-LS1": 5.75, + "Panasonic Lumix DMC-LS2": 5.75, + "Panasonic Lumix DMC-LS3": 5.75, + "Panasonic Lumix DMC-LS5": 6.08, + "Panasonic Lumix DMC-LS6": 6.08, + "Panasonic Lumix DMC-LS60": 5.75, + "Panasonic Lumix DMC-LS75": 5.75, + "Panasonic Lumix DMC-LS80": 5.75, + "Panasonic Lumix DMC-LS85": 5.75, + "Panasonic Lumix DMC-LX1": 7.76, + "Panasonic Lumix DMC-LX100": 17.3, + "Panasonic Lumix DMC-LX2": 7.76, + "Panasonic Lumix DMC-LX3": 7.85, + "Panasonic Lumix DMC-LX5": 7.85, + "Panasonic Lumix DMC-LX7": 7.53, + "Panasonic Lumix DMC-LZ1": 5.75, + "Panasonic Lumix DMC-LZ10": 6.08, + "Panasonic Lumix DMC-LZ2": 5.75, + "Panasonic Lumix DMC-LZ3": 5.75, + "Panasonic Lumix DMC-LZ30": 6.16, + "Panasonic Lumix DMC-LZ40": 6.16, + "Panasonic Lumix DMC-LZ5": 5.75, + "Panasonic Lumix DMC-LZ6": 5.75, + "Panasonic Lumix DMC-LZ7": 5.75, + "Panasonic Lumix DMC-LZ8": 5.75, + "Panasonic Lumix DMC-S1": 6.08, + "Panasonic Lumix DMC-S2": 6.08, + "Panasonic Lumix DMC-S3": 6.08, + "Panasonic Lumix DMC-S5": 6.08, + "Panasonic Lumix DMC-SZ1": 6.08, + "Panasonic Lumix DMC-SZ3": 6.08, + "Panasonic Lumix DMC-SZ5": 6.08, + "Panasonic Lumix DMC-SZ7": 6.08, + "Panasonic Lumix DMC-SZ8": 6.08, + "Panasonic Lumix DMC-SZ9": 6.08, + "Panasonic Lumix DMC-TS1": 6.08, + "Panasonic Lumix DMC-TS10": 6.08, + "Panasonic Lumix DMC-TS2": 6.08, + "Panasonic Lumix DMC-TS20": 6.08, + "Panasonic Lumix DMC-TS25": 6.08, + "Panasonic Lumix DMC-TS3": 6.08, + "Panasonic Lumix DMC-TS4": 6.08, + "Panasonic Lumix DMC-TS5": 6.08, + "Panasonic Lumix DMC-TZ1": 5.75, + "Panasonic Lumix DMC-TZ10": 6.08, + "Panasonic Lumix DMC-TZ18": 6.08, + "Panasonic Lumix DMC-TZ2": 6.08, + "Panasonic Lumix DMC-TZ20": 6.08, + "Panasonic Lumix DMC-TZ22": 6.08, + "Panasonic Lumix DMC-TZ25": 6.16, + "Panasonic Lumix DMC-TZ3": 6.08, + "Panasonic Lumix DMC-TZ30": 6.08, + "Panasonic Lumix DMC-TZ31": 6.08, + "Panasonic Lumix DMC-TZ4": 5.75, + "Panasonic Lumix DMC-TZ5": 6.08, + "Panasonic Lumix DMC-TZ50": 6.08, + "Panasonic Lumix DMC-TZ6": 5.75, + "Panasonic Lumix DMC-TZ7": 6.08, + "Panasonic Lumix DMC-XS1": 6.08, + "Panasonic Lumix DMC-XS3": 6.08, + "Panasonic Lumix DMC-ZR1": 6.08, + "Panasonic Lumix DMC-ZR3": 6.08, + "Panasonic Lumix DMC-ZS1": 5.75, + "Panasonic Lumix DMC-ZS10": 6.08, + "Panasonic Lumix DMC-ZS15": 6.16, + "Panasonic Lumix DMC-ZS20": 6.08, + "Panasonic Lumix DMC-ZS25": 6.08, + "Panasonic Lumix DMC-ZS3": 6.08, + "Panasonic Lumix DMC-ZS30": 6.16, + "Panasonic Lumix DMC-ZS35 / TZ55": 6.08, + "Panasonic Lumix DMC-ZS40 / TZ60": 6.16, + "Panasonic Lumix DMC-ZS5": 6.08, + "Panasonic Lumix DMC-ZS7": 6.08, + "Panasonic Lumix DMC-ZS8": 6.08, + "Panasonic Lumix DMC-ZX1": 6.08, + "Panasonic Lumix DMC-ZX3": 6.08, + "Panasonic PV DC3000": 7.11, + "Pentax *ist D": 23.5, + "Pentax *ist DL": 23.5, + "Pentax *ist DL2": 23.5, + "Pentax *ist DS": 23.5, + "Pentax *ist DS2": 23.5, + "Pentax 645D": 44.0, + "Pentax 645Z": 44.0, + "Pentax EI-100": 4.5, + "Pentax EI-200": 5.33, + "Pentax EI-2000": 8.8, + "Pentax Efina": 6.16, + "Pentax K-01": 23.7, + "Pentax K-3": 23.5, + "Pentax K-30": 23.7, + "Pentax K-5": 23.6, + "Pentax K-5 II": 23.7, + "Pentax K-50": 23.7, + "Pentax K-500": 23.7, + "Pentax K-7": 23.4, + "Pentax K-S1": 23.5, + "Pentax K-m": 23.5, + "Pentax K-r": 23.6, + "Pentax K-x": 23.6, + "Pentax K100D": 23.5, + "Pentax K100D Super": 23.5, + "Pentax K10D": 23.5, + "Pentax K110D": 23.7, + "Pentax K200D": 23.5, + "Pentax K20D": 23.4, + "Pentax MX-1": 7.53, + "Pentax Optio 230": 5.33, + "Pentax Optio 30": 5.33, + "Pentax Optio 330": 7.11, + "Pentax Optio 330GS": 5.33, + "Pentax Optio 330RS": 7.11, + "Pentax Optio 33L": 5.33, + "Pentax Optio 33LF": 5.33, + "Pentax Optio 33WR": 5.33, + "Pentax Optio 430": 7.11, + "Pentax Optio 430RS": 7.11, + "Pentax Optio 43WR": 5.33, + "Pentax Optio 450": 7.11, + "Pentax Optio 50": 5.75, + "Pentax Optio 50L": 5.75, + "Pentax Optio 550": 7.11, + "Pentax Optio 555": 7.11, + "Pentax Optio 60": 7.11, + "Pentax Optio 750Z": 7.11, + "Pentax Optio A10": 7.11, + "Pentax Optio A20": 5.75, + "Pentax Optio A30": 7.11, + "Pentax Optio A40": 7.53, + "Pentax Optio E10": 5.75, + "Pentax Optio E20": 5.75, + "Pentax Optio E25": 5.75, + "Pentax Optio E30": 5.75, + "Pentax Optio E40": 5.75, + "Pentax Optio E50": 5.75, + "Pentax Optio E60": 6.08, + "Pentax Optio E70": 6.16, + "Pentax Optio E70L": 6.16, + "Pentax Optio E75": 6.08, + "Pentax Optio E80": 6.08, + "Pentax Optio E85": 6.16, + "Pentax Optio E90": 6.16, + "Pentax Optio H90": 6.16, + "Pentax Optio I-10": 6.16, + "Pentax Optio L20": 5.75, + "Pentax Optio L50": 6.03, + "Pentax Optio LS1000": 6.16, + "Pentax Optio LS1100": 6.08, + "Pentax Optio LS465": 23.7, + "Pentax Optio M10": 5.75, + "Pentax Optio M20": 5.75, + "Pentax Optio M30": 5.75, + "Pentax Optio M40": 6.03, + "Pentax Optio M50": 6.03, + "Pentax Optio M60": 6.16, + "Pentax Optio M85": 6.16, + "Pentax Optio M90": 6.08, + "Pentax Optio MX": 5.33, + "Pentax Optio MX4": 5.33, + "Pentax Optio P70": 6.16, + "Pentax Optio P80": 6.16, + "Pentax Optio RS1000": 6.16, + "Pentax Optio RS1500": 6.08, + "Pentax Optio RZ10": 6.08, + "Pentax Optio RZ18": 6.08, + "Pentax Optio S": 5.75, + "Pentax Optio S1": 6.16, + "Pentax Optio S10": 7.11, + "Pentax Optio S12": 7.53, + "Pentax Optio S30": 5.33, + "Pentax Optio S4": 5.75, + "Pentax Optio S40": 5.75, + "Pentax Optio S45": 5.75, + "Pentax Optio S4i": 5.75, + "Pentax Optio S50": 5.75, + "Pentax Optio S55": 5.75, + "Pentax Optio S5i": 5.75, + "Pentax Optio S5n": 5.75, + "Pentax Optio S5z": 5.75, + "Pentax Optio S6": 5.75, + "Pentax Optio S60": 5.75, + "Pentax Optio S7": 5.75, + "Pentax Optio SV": 5.75, + "Pentax Optio SVi": 5.75, + "Pentax Optio T10": 5.75, + "Pentax Optio T20": 5.75, + "Pentax Optio T30": 5.75, + "Pentax Optio V10": 6.03, + "Pentax Optio V20": 6.03, + "Pentax Optio VS20": 6.08, + "Pentax Optio W10": 5.75, + "Pentax Optio W20": 5.75, + "Pentax Optio W30": 5.75, + "Pentax Optio W60": 6.16, + "Pentax Optio W80": 6.16, + "Pentax Optio W90": 6.16, + "Pentax Optio WG-1": 6.16, + "Pentax Optio WG-1 GPS": 6.16, + "Pentax Optio WG-2": 6.16, + "Pentax Optio WG-2 GPS": 6.16, + "Pentax Optio WP": 5.75, + "Pentax Optio WPi": 5.75, + "Pentax Optio WS80": 6.08, + "Pentax Optio X": 5.75, + "Pentax Optio Z10": 5.75, + "Pentax Q": 6.16, + "Pentax Q-S1": 7.53, + "Pentax Q10": 6.16, + "Pentax Q7": 7.53, + "Pentax WG-10": 6.16, + "Pentax WG-3": 6.16, + "Pentax X-5": 6.08, + "Pentax X70": 6.08, + "Pentax X90": 6.08, + "Pentax XG-1": 6.08, + "Phase One iXM-RS100F": 53.4, + "Praktica DC 20": 6.4, + "Praktica DC 21": 5.33, + "Praktica DC 22": 5.33, + "Praktica DC 32": 5.33, + "Praktica DC 34": 5.33, + "Praktica DC 42": 5.75, + "Praktica DC 44": 5.75, + "Praktica DC 50": 5.75, + "Praktica DC 52": 5.75, + "Praktica DC 60": 5.75, + "Praktica DC Slim 2": 6.4, + "Praktica DC Slim 5": 7.11, + "Praktica DC440": 5.75, + "Praktica DCZ 1.3": 6.4, + "Praktica DCZ 10.1": 7.11, + "Praktica DCZ 10.2": 6.16, + "Praktica DCZ 10.3": 6.08, + "Praktica DCZ 10.4": 6.16, + "Praktica DCZ 12.1": 6.16, + "Praktica DCZ 12.Z4": 6.16, + "Praktica DCZ 14.1": 6.16, + "Praktica DCZ 14.2": 6.16, + "Praktica DCZ 2.0": 4.8, + "Praktica DCZ 2.1": 4.8, + "Praktica DCZ 2.1 S": 4.5, + "Praktica DCZ 2.2": 5.33, + "Praktica DCZ 2.2 S": 6.4, + "Praktica DCZ 3.0": 6.4, + "Praktica DCZ 3.2": 7.11, + "Praktica DCZ 3.2D": 6.4, + "Praktica DCZ 3.2S": 6.4, + "Praktica DCZ 3.3": 7.11, + "Praktica DCZ 3.4": 5.75, + "Praktica DCZ 3.5": 5.75, + "Praktica DCZ 4.1": 7.11, + "Praktica DCZ 4.2": 7.11, + "Praktica DCZ 4.3": 7.11, + "Praktica DCZ 4.4": 5.75, + "Praktica DCZ 5.1": 7.11, + "Praktica DCZ 5.2": 7.11, + "Praktica DCZ 5.3": 5.75, + "Praktica DCZ 5.4": 5.75, + "Praktica DCZ 5.5": 5.75, + "Praktica DCZ 5.8": 5.75, + "Praktica DCZ 6.1": 5.75, + "Praktica DCZ 6.2": 5.75, + "Praktica DCZ 6.3": 5.75, + "Praktica DCZ 6.8": 5.75, + "Praktica DCZ 7.1": 5.75, + "Praktica DCZ 7.2": 5.75, + "Praktica DCZ 7.3": 5.75, + "Praktica DCZ 7.4": 5.75, + "Praktica DCZ 8.1": 5.75, + "Praktica DCZ 8.2": 5.75, + "Praktica DCZ 8.3": 5.75, + "Praktica DMMC": 4.8, + "Praktica DMMC 4": 4.8, + "Praktica DVC 6.1": 5.75, + "Praktica Digi 3": 6.4, + "Praktica Digi 3 LM": 6.4, + "Praktica Digi 30": 6.4, + "Praktica Digicam 3": 6.4, + "Praktica Dpix 1000z": 6.16, + "Praktica Dpix 1100z": 5.75, + "Praktica Dpix 1220z": 6.08, + "Praktica Dpix 3000": 6.4, + "Praktica Dpix 3200": 4.8, + "Praktica Dpix 3300": 4.8, + "Praktica Dpix 5000 WP": 4.5, + "Praktica Dpix 5100": 5.75, + "Praktica Dpix 510Z": 5.75, + "Praktica Dpix 5200": 5.75, + "Praktica Dpix 530Z": 5.75, + "Praktica Dpix 740z": 5.75, + "Praktica Dpix 750z": 5.75, + "Praktica Dpix 810z": 5.75, + "Praktica Dpix 820z": 5.75, + "Praktica Dpix 9000": 6.16, + "Praktica Dpix 910z": 6.4, + "Praktica Exakta DC 4200": 7.11, + "Praktica G2.0": 6.4, + "Praktica G3.2": 6.4, + "Praktica Luxmedia 10 TS": 6.16, + "Praktica Luxmedia 10 X3": 7.11, + "Praktica Luxmedia 10 XS": 5.75, + "Praktica Luxmedia 10-03": 6.16, + "Praktica Luxmedia 10-23": 6.16, + "Praktica Luxmedia 12 HD": 7.44, + "Praktica Luxmedia 12 TS": 6.16, + "Praktica Luxmedia 12 XS": 5.75, + "Praktica Luxmedia 12-03": 5.75, + "Praktica Luxmedia 12-04": 6.16, + "Praktica Luxmedia 12-23": 6.16, + "Praktica Luxmedia 12-Z4": 6.16, + "Praktica Luxmedia 12-Z4TS": 6.16, + "Praktica Luxmedia 12-Z5": 6.16, + "Praktica Luxmedia 14-Z50S": 6.16, + "Praktica Luxmedia 14-Z51": 6.16, + "Praktica Luxmedia 14-Z80S": 6.16, + "Praktica Luxmedia 16-Z12S": 6.16, + "Praktica Luxmedia 16-Z21C": 6.16, + "Praktica Luxmedia 16-Z21S": 6.16, + "Praktica Luxmedia 16-Z24S": 6.16, + "Praktica Luxmedia 16-Z51": 6.16, + "Praktica Luxmedia 16-Z52": 6.16, + "Praktica Luxmedia 18-Z36C": 6.16, + "Praktica Luxmedia 4008": 5.75, + "Praktica Luxmedia 5003": 7.11, + "Praktica Luxmedia 5008": 5.75, + "Praktica Luxmedia 5103": 7.11, + "Praktica Luxmedia 5203": 5.75, + "Praktica Luxmedia 5303": 5.75, + "Praktica Luxmedia 6103": 7.11, + "Praktica Luxmedia 6105": 5.75, + "Praktica Luxmedia 6203": 5.75, + "Praktica Luxmedia 6403": 5.75, + "Praktica Luxmedia 6503": 5.75, + "Praktica Luxmedia 7103": 5.75, + "Praktica Luxmedia 7105": 5.75, + "Praktica Luxmedia 7203": 5.75, + "Praktica Luxmedia 7303": 5.75, + "Praktica Luxmedia 7403": 5.75, + "Praktica Luxmedia 8003": 7.11, + "Praktica Luxmedia 8203": 5.75, + "Praktica Luxmedia 8213": 5.75, + "Praktica Luxmedia 8303": 5.75, + "Praktica Luxmedia 8403": 5.75, + "Praktica Luxmedia 8503": 5.75, + "Praktica Mini": 6.4, + "Praktica V2.1": 6.4, + "Praktica V3.2": 6.4, + "Ricoh CX1": 6.16, + "Ricoh CX2": 6.16, + "Ricoh CX3": 6.16, + "Ricoh CX4": 6.16, + "Ricoh CX5": 6.16, + "Ricoh CX6": 6.16, + "Ricoh Caplio 400G Wide": 5.33, + "Ricoh Caplio 500G": 7.11, + "Ricoh Caplio 500G Wide": 7.11, + "Ricoh Caplio 500SE": 7.11, + "Ricoh Caplio G3": 5.33, + "Ricoh Caplio G3s": 5.33, + "Ricoh Caplio GX": 7.11, + "Ricoh Caplio GX100": 7.31, + "Ricoh Caplio GX200": 7.53, + "Ricoh Caplio GX8": 7.11, + "Ricoh Caplio R1": 7.11, + "Ricoh Caplio R1V": 5.75, + "Ricoh Caplio R2": 5.75, + "Ricoh Caplio R2S": 5.75, + "Ricoh Caplio R3": 5.75, + "Ricoh Caplio R30": 5.75, + "Ricoh Caplio R4": 5.75, + "Ricoh Caplio R40": 5.75, + "Ricoh Caplio R5": 5.75, + "Ricoh Caplio R6": 5.75, + "Ricoh Caplio R7": 5.75, + "Ricoh Caplio R8": 6.16, + "Ricoh Caplio RR1": 7.11, + "Ricoh Caplio RR10": 5.33, + "Ricoh Caplio RR120": 4.5, + "Ricoh Caplio RR230": 4.5, + "Ricoh Caplio RR30": 5.33, + "Ricoh Caplio RR330": 5.75, + "Ricoh Caplio RR530": 5.75, + "Ricoh Caplio RR630": 7.11, + "Ricoh Caplio RR660": 5.75, + "Ricoh Caplio RR750": 5.75, + "Ricoh Caplio RR770": 5.75, + "Ricoh Caplio RX": 5.33, + "Ricoh Caplio RZ1": 5.75, + "Ricoh G600": 6.16, + "Ricoh G700": 6.16, + "Ricoh G700SE": 6.16, + "Ricoh G800": 6.16, + "Ricoh GR": 23.6, + "Ricoh GR Digital": 7.11, + "Ricoh GR Digital 3": 7.53, + "Ricoh GR Digital 4": 7.53, + "Ricoh GR Digital II": 7.31, + "Ricoh GX200": 7.53, + "Ricoh GXR A12 50mm F2.5 Macro": 23.6, + "Ricoh GXR A16 24-85mm F3.5-5.5": 23.6, + "Ricoh GXR GR Lens A12 28mm F2.5": 23.6, + "Ricoh GXR Mount A12": 23.6, + "Ricoh GXR P10 28-300mm F3.5-5.6 VC": 6.16, + "Ricoh GXR S10 24-72mm F2.5-4.4 VC": 7.53, + "Ricoh HZ15": 6.16, + "Ricoh PX": 6.16, + "Ricoh R10": 6.16, + "Ricoh R50": 6.08, + "Ricoh R8": 6.16, + "Ricoh RDC-200G": 6.4, + "Ricoh RDC-4300": 4.8, + "Ricoh RDC-5000": 5.33, + "Ricoh RDC-5300": 5.33, + "Ricoh RDC-6000": 6.4, + "Ricoh RDC-7": 7.11, + "Ricoh RDC-i500": 7.11, + "Ricoh RDC-i700": 7.11, + "Ricoh WG-20": 6.16, + "Ricoh WG-4": 6.16, + "Ricoh WG-M1": 6.16, + "Rollei Compactline 100": 6.16, + "Rollei Compactline 101": 5.75, + "Rollei Compactline 102": 5.75, + "Rollei Compactline 103": 6.08, + "Rollei Compactline 110": 5.75, + "Rollei Compactline 130": 5.75, + "Rollei Compactline 150": 6.08, + "Rollei Compactline 200": 6.16, + "Rollei Compactline 202": 6.08, + "Rollei Compactline 203": 6.08, + "Rollei Compactline 230": 6.08, + "Rollei Compactline 302": 6.08, + "Rollei Compactline 304": 6.08, + "Rollei Compactline 312": 6.16, + "Rollei Compactline 320": 6.08, + "Rollei Compactline 350": 6.16, + "Rollei Compactline 360 TS": 6.16, + "Rollei Compactline 370 TS": 6.16, + "Rollei Compactline 390 SE": 6.16, + "Rollei Compactline 412": 6.08, + "Rollei Compactline 415": 6.08, + "Rollei Compactline 425": 6.16, + "Rollei Compactline 50": 6.16, + "Rollei Compactline 52": 6.16, + "Rollei Compactline 55": 6.16, + "Rollei Compactline 80": 5.75, + "Rollei Compactline 81": 5.75, + "Rollei Compactline 90": 6.16, + "Rollei Flexline 100": 5.75, + "Rollei Flexline 100 iT": 5.75, + "Rollei Flexline 140": 6.08, + "Rollei Flexline 200": 6.08, + "Rollei Flexline 202": 6.16, + "Rollei Flexline 250": 6.08, + "Rollei Kids 100": 8.8, + "Rollei Powerflex 240 HD": 6.16, + "Rollei Powerflex 360 Full HD": 6.16, + "Rollei Powerflex 3D": 6.08, + "Rollei Powerflex 400": 6.08, + "Rollei Powerflex 440": 6.08, + "Rollei Powerflex 450": 6.08, + "Rollei Powerflex 455": 6.08, + "Rollei Powerflex 460": 6.08, + "Rollei Powerflex 470": 6.08, + "Rollei Powerflex 500": 6.08, + "Rollei Powerflex 600": 6.16, + "Rollei Powerflex 610 HD": 6.16, + "Rollei Powerflex 700 Full HD": 6.08, + "Rollei Powerflex 800": 6.08, + "Rollei Powerflex 820": 6.08, + "Rollei Prego da3": 5.33, + "Rollei Prego da4": 5.75, + "Rollei Prego da5": 5.75, + "Rollei Prego da6": 7.11, + "Rollei Prego dp4200": 5.75, + "Rollei Prego dp5200": 5.75, + "Rollei Prego dp5300": 7.11, + "Rollei Prego dp5500": 5.75, + "Rollei Prego dp6000": 7.11, + "Rollei Prego dp6200": 7.11, + "Rollei Prego dp6300": 7.11, + "Rollei Prego dp8300": 7.11, + "Rollei RCP-10325X": 7.11, + "Rollei RCP-5324": 5.75, + "Rollei RCP-6324": 5.75, + "Rollei RCP-7324": 5.75, + "Rollei RCP-7325XS": 5.75, + "Rollei RCP-7330X": 5.75, + "Rollei RCP-7430XW": 5.75, + "Rollei RCP-8325": 5.75, + "Rollei RCP-8325X": 7.11, + "Rollei RCP-8325XS": 5.75, + "Rollei RCP-8330X": 5.75, + "Rollei RCP-8427XW": 5.75, + "Rollei RCP-8527X": 5.75, + "Rollei RCP-S10": 6.16, + "Rollei RCP-S8": 5.75, + "Rollei Sportsline 50": 6.16, + "Rollei Sportsline 60 Camouflage": 6.16, + "Rollei Sportsline 62": 6.16, + "Rollei Sportsline 90": 6.16, + "Rollei Sportsline 99": 6.16, + "Rollei X-8": 5.75, + "Rollei X-8 Compact": 5.75, + "Rollei X-8 Sports": 5.75, + "Rollei XS-10": 5.75, + "Rollei XS-10 inTouch": 6.16, + "Rollei XS-8": 5.75, + "Rollei XS-8 Crystal": 5.75, + "Rollei d20 motion": 7.11, + "Rollei d210 motion": 4.5, + "Rollei d23 com": 7.53, + "Rollei d33 com": 7.11, + "Rollei d330 motion": 5.33, + "Rollei d41 com": 7.11, + "Rollei d530 flex": 8.8, + "Rollei da10": 5.75, + "Rollei da1325 Prego": 5.75, + "Rollei da5324": 5.75, + "Rollei da5325 Prego": 5.75, + "Rollei da6324": 5.75, + "Rollei da7325 Prego": 5.75, + "Rollei da8324": 5.75, + "Rollei dc 3100": 5.33, + "Rollei dcx 310": 7.11, + "Rollei dcx 400": 7.11, + "Rollei dk 3000": 5.33, + "Rollei dk4010": 5.33, + "Rollei dp 300": 5.33, + "Rollei dp 3210": 5.33, + "Rollei dp6500": 7.11, + "Rollei dpx 310": 5.33, + "Rollei dr 5": 5.75, + "Rollei dr 5100": 7.11, + "Rollei ds6": 5.75, + "Rollei dsx 410": 5.75, + "Rollei dt 3200": 5.75, + "Rollei dt 4000": 5.75, + "Rollei dt 4200": 5.75, + "Rollei dt6 Tribute": 5.75, + "Rollei dx63": 5.75, + "Samsung AQ100": 6.08, + "Samsung CL5": 5.75, + "Samsung CL65": 6.08, + "Samsung CL80": 6.16, + "Samsung D75": 5.75, + "Samsung D830": 7.11, + "Samsung D85": 5.75, + "Samsung D860": 5.75, + "Samsung DV100": 6.16, + "Samsung DV150F": 6.16, + "Samsung DV300F": 6.16, + "Samsung Digimax 101": 6.4, + "Samsung Digimax 130": 4.5, + "Samsung Digimax 200": 5.33, + "Samsung Digimax 201": 4.5, + "Samsung Digimax 202": 6.4, + "Samsung Digimax 210 SE": 5.33, + "Samsung Digimax 220 SE": 5.33, + "Samsung Digimax 230": 5.33, + "Samsung Digimax 240": 4.5, + "Samsung Digimax 250": 4.5, + "Samsung Digimax 301": 5.33, + "Samsung Digimax 330": 7.53, + "Samsung Digimax 340": 7.11, + "Samsung Digimax 35 MP3": 4.8, + "Samsung Digimax 350SE": 7.11, + "Samsung Digimax 360": 7.11, + "Samsung Digimax 370": 5.75, + "Samsung Digimax 401": 5.75, + "Samsung Digimax 410": 7.11, + "Samsung Digimax 420": 7.11, + "Samsung Digimax 430": 5.75, + "Samsung Digimax 50 duo": 4.8, + "Samsung Digimax 530": 7.11, + "Samsung Digimax A400": 5.75, + "Samsung Digimax A402": 5.75, + "Samsung Digimax A5": 7.11, + "Samsung Digimax A50": 5.75, + "Samsung Digimax A502": 5.75, + "Samsung Digimax A503": 5.75, + "Samsung Digimax A55W": 5.75, + "Samsung Digimax A6": 7.11, + "Samsung Digimax A7": 7.11, + "Samsung Digimax D103": 7.11, + "Samsung Digimax L50": 5.75, + "Samsung Digimax L55W": 5.75, + "Samsung Digimax L60": 5.75, + "Samsung Digimax L70": 5.75, + "Samsung Digimax L85": 7.11, + "Samsung Digimax S1000": 7.11, + "Samsung Digimax S500": 5.75, + "Samsung Digimax S600": 5.75, + "Samsung Digimax S700": 5.75, + "Samsung Digimax S800": 7.11, + "Samsung Digimax U-CA 3": 5.33, + "Samsung Digimax U-CA 4": 5.75, + "Samsung Digimax U-CA 401": 5.75, + "Samsung Digimax U-CA5": 5.75, + "Samsung Digimax U-CA501": 5.75, + "Samsung Digimax U-CA505": 5.75, + "Samsung Digimax V3": 7.11, + "Samsung Digimax V4": 7.11, + "Samsung Digimax V40": 7.11, + "Samsung Digimax V4000": 7.11, + "Samsung Digimax V5": 7.11, + "Samsung Digimax V50": 7.11, + "Samsung Digimax V6": 6.74, + "Samsung Digimax V600": 7.11, + "Samsung Digimax V70": 7.11, + "Samsung Digimax V700": 7.11, + "Samsung Digimax V800": 7.11, + "Samsung Digimax i5": 5.75, + "Samsung Digimax i50 MP3": 5.75, + "Samsung Digimax i6": 5.75, + "Samsung ES10": 5.75, + "Samsung ES15": 6.08, + "Samsung ES17": 6.08, + "Samsung ES20": 6.08, + "Samsung ES25": 6.08, + "Samsung ES28": 6.08, + "Samsung ES30": 6.16, + "Samsung ES50": 6.08, + "Samsung ES55": 6.08, + "Samsung ES60": 6.08, + "Samsung ES65": 6.08, + "Samsung ES70": 6.08, + "Samsung ES73": 6.08, + "Samsung ES75": 6.08, + "Samsung ES80": 6.16, + "Samsung ES90": 6.16, + "Samsung ES95": 6.16, + "Samsung EX1": 7.53, + "Samsung EX2F": 7.53, + "Samsung GX-10": 23.5, + "Samsung GX-1L": 23.5, + "Samsung GX-1S": 23.5, + "Samsung GX-20": 23.4, + "Samsung Galaxy Camera": 6.16, + "Samsung Galaxy Camera 2": 6.16, + "Samsung Galaxy NX": 23.5, + "Samsung Galaxy S2": 4.54, + "Samsung Galaxy S3": 4.54, + "Samsung Galaxy S4": 4.69, + "Samsung Galaxy S4 Zoom": 6.19, + "Samsung HZ10W": 6.08, + "Samsung HZ15W": 6.08, + "Samsung HZ25W": 6.08, + "Samsung HZ30W": 6.16, + "Samsung HZ35W": 6.16, + "Samsung HZ50W": 6.16, + "Samsung IT100": 6.08, + "Samsung L100": 5.75, + "Samsung L110": 5.75, + "Samsung L200": 5.75, + "Samsung L201": 6.08, + "Samsung L210": 6.08, + "Samsung L301": 6.08, + "Samsung L310W": 7.44, + "Samsung L700": 5.75, + "Samsung L73": 5.75, + "Samsung L730": 5.75, + "Samsung L74": 5.75, + "Samsung L74 Wide": 5.75, + "Samsung L77": 5.75, + "Samsung L80": 7.11, + "Samsung L830": 5.75, + "Samsung L83T": 5.75, + "Samsung M100": 5.75, + "Samsung MV800": 6.16, + "Samsung Miniket VP-MS10": 5.75, + "Samsung Miniket VP-MS11": 5.75, + "Samsung Miniket VP-MS15": 5.75, + "Samsung NV10": 7.11, + "Samsung NV100 HD": 7.44, + "Samsung NV11": 7.11, + "Samsung NV15": 7.27, + "Samsung NV20": 7.44, + "Samsung NV24HD": 6.16, + "Samsung NV3": 5.75, + "Samsung NV30": 5.75, + "Samsung NV4": 5.75, + "Samsung NV40": 6.08, + "Samsung NV7 OPS": 5.75, + "Samsung NV8": 7.27, + "Samsung NV9": 6.16, + "Samsung NX mini": 13.2, + "Samsung NX1": 23.5, + "Samsung NX10": 23.4, + "Samsung NX100": 23.4, + "Samsung NX1000": 23.5, + "Samsung NX11": 23.4, + "Samsung NX1100": 23.5, + "Samsung NX20": 23.5, + "Samsung NX200": 23.5, + "Samsung NX2000": 23.5, + "Samsung NX210": 23.5, + "Samsung NX30": 23.5, + "Samsung NX300": 23.5, + "Samsung NX3000": 23.5, + "Samsung NX5": 23.4, + "Samsung PL10": 5.75, + "Samsung PL100": 6.08, + "Samsung PL120": 6.16, + "Samsung PL150": 6.08, + "Samsung PL160": 6.08, + "Samsung PL170": 6.08, + "Samsung PL20": 6.16, + "Samsung PL200": 6.16, + "Samsung PL210": 6.16, + "Samsung PL50": 6.08, + "Samsung PL51": 6.16, + "Samsung PL55": 6.08, + "Samsung PL60": 6.08, + "Samsung PL65": 6.08, + "Samsung PL70": 6.08, + "Samsung PL80": 6.08, + "Samsung PL90": 6.08, + "Samsung Pro815": 8.8, + "Samsung S1030": 7.11, + "Samsung S1050": 7.11, + "Samsung S1060": 6.08, + "Samsung S1070": 6.08, + "Samsung S630": 5.75, + "Samsung S730": 5.75, + "Samsung S750": 5.75, + "Samsung S760": 5.75, + "Samsung S830": 7.11, + "Samsung S85": 5.75, + "Samsung S850": 7.11, + "Samsung S860": 5.75, + "Samsung SDC-MS61": 5.75, + "Samsung SH100": 6.08, + "Samsung SL102": 6.08, + "Samsung SL201": 6.08, + "Samsung SL202": 6.08, + "Samsung SL30": 6.08, + "Samsung SL310W": 7.44, + "Samsung SL50": 6.08, + "Samsung SL502": 6.08, + "Samsung SL600": 6.08, + "Samsung SL605": 6.08, + "Samsung SL620": 6.08, + "Samsung SL630": 6.08, + "Samsung SL720": 6.08, + "Samsung SL820": 6.08, + "Samsung ST10": 5.75, + "Samsung ST100": 6.16, + "Samsung ST1000": 6.16, + "Samsung ST150F": 6.16, + "Samsung ST200F": 6.16, + "Samsung ST30": 4.8, + "Samsung ST45": 6.08, + "Samsung ST50": 6.08, + "Samsung ST500": 6.16, + "Samsung ST5000": 6.16, + "Samsung ST550": 6.08, + "Samsung ST5500": 6.16, + "Samsung ST60": 6.16, + "Samsung ST600": 6.08, + "Samsung ST65": 6.16, + "Samsung ST6500": 6.08, + "Samsung ST66": 6.16, + "Samsung ST70": 6.16, + "Samsung ST700": 6.16, + "Samsung ST72": 6.16, + "Samsung ST76": 6.16, + "Samsung ST77": 6.16, + "Samsung ST80": 6.08, + "Samsung ST88": 6.16, + "Samsung ST90": 6.16, + "Samsung ST93": 6.16, + "Samsung ST95": 6.16, + "Samsung ST96": 6.16, + "Samsung TL100": 6.08, + "Samsung TL105": 6.16, + "Samsung TL110": 6.16, + "Samsung TL205": 6.08, + "Samsung TL210": 6.08, + "Samsung TL220": 6.08, + "Samsung TL225": 6.08, + "Samsung TL240": 6.16, + "Samsung TL320": 6.08, + "Samsung TL34HD": 7.44, + "Samsung TL350": 6.08, + "Samsung TL500": 7.53, + "Samsung TL9": 6.16, + "Samsung WB100": 6.16, + "Samsung WB1000": 6.08, + "Samsung WB110": 6.16, + "Samsung WB1100F": 6.16, + "Samsung WB150F": 6.16, + "Samsung WB2000": 6.08, + "Samsung WB200F": 6.16, + "Samsung WB210": 6.16, + "Samsung WB2100": 6.16, + "Samsung WB2200F": 6.16, + "Samsung WB250F": 6.16, + "Samsung WB30F": 6.16, + "Samsung WB350F": 6.16, + "Samsung WB35F": 6.16, + "Samsung WB500": 6.08, + "Samsung WB5000": 6.08, + "Samsung WB50F": 6.16, + "Samsung WB510": 6.08, + "Samsung WB550": 6.08, + "Samsung WB5500": 6.16, + "Samsung WB560": 6.08, + "Samsung WB600": 6.16, + "Samsung WB650": 6.16, + "Samsung WB660": 6.16, + "Samsung WB690": 6.08, + "Samsung WB700": 6.08, + "Samsung WB750": 6.08, + "Samsung WB800F": 6.16, + "Samsung WB850F": 6.16, + "Samsung WP10": 6.08, + "Samsung i100": 6.08, + "Samsung i7": 5.75, + "Samsung i70": 5.75, + "Samsung i8": 5.75, + "Samsung i80": 5.75, + "Samsung i85": 5.75, + "Sanyo DSC S1": 5.33, + "Sanyo DSC S3": 5.33, + "Sanyo DSC S4": 5.75, + "Sanyo DSC S5": 5.75, + "Sanyo VPC A5": 5.75, + "Sanyo VPC AZ1": 7.11, + "Sanyo VPC AZ3 EX": 7.11, + "Sanyo VPC E1090": 6.16, + "Sanyo VPC E1403": 6.16, + "Sanyo VPC E1500TP": 6.08, + "Sanyo VPC E890": 5.75, + "Sanyo VPC HD1 EX": 5.75, + "Sanyo VPC J1 EX": 5.33, + "Sanyo VPC J2 EX": 5.33, + "Sanyo VPC J4 EX": 5.33, + "Sanyo VPC MZ1": 7.11, + "Sanyo VPC MZ2": 7.11, + "Sanyo VPC S1085": 6.08, + "Sanyo VPC S122": 6.16, + "Sanyo VPC S1275": 6.16, + "Sanyo VPC S1414": 6.08, + "Sanyo VPC S885": 5.75, + "Sanyo VPC T1495": 6.08, + "Sanyo VPC X1200": 6.16, + "Sanyo VPC X1220": 6.16, + "Sanyo VPC X1420": 6.16, + "Sanyo Xacti C1": 5.33, + "Sanyo Xacti C4": 5.33, + "Sanyo Xacti C40": 5.33, + "Sanyo Xacti C5": 5.75, + "Sanyo Xacti C6": 5.75, + "Sanyo Xacti DMX-CA65": 5.75, + "Sanyo Xacti DMX-CA8": 5.75, + "Sanyo Xacti DMX-CG65": 5.75, + "Sanyo Xacti DMX-CG9": 6.08, + "Sanyo Xacti DMX-HD700": 5.75, + "Sanyo Xacti DMX-HD800": 5.75, + "Sanyo Xacti E6": 5.75, + "Sanyo Xacti E60": 5.75, + "Sanyo Xacti S50": 5.75, + "Sanyo Xacti S6": 5.75, + "Sanyo Xacti S60": 5.75, + "Sanyo Xacti S70": 5.75, + "Sanyo Xacti VPC S1 EX": 5.33, + "Sanyo Xacti VPC S3 EX": 5.33, + "Sanyo Xacti VPC S4 EX": 5.75, + "Sanyo Xacti VPC-503": 5.75, + "Sanyo Xacti VPC-603": 5.75, + "Sanyo Xacti VPC-CA6": 5.75, + "Sanyo Xacti VPC-CA9": 5.75, + "Sanyo Xacti VPC-CG10": 6.08, + "Sanyo Xacti VPC-CG6": 5.75, + "Sanyo Xacti VPC-E10": 6.16, + "Sanyo Xacti VPC-E1075": 6.08, + "Sanyo Xacti VPC-E7": 5.75, + "Sanyo Xacti VPC-E760": 5.75, + "Sanyo Xacti VPC-E860": 5.75, + "Sanyo Xacti VPC-E870": 5.75, + "Sanyo Xacti VPC-E875": 5.75, + "Sanyo Xacti VPC-HD1A": 5.75, + "Sanyo Xacti VPC-HD2": 5.75, + "Sanyo Xacti VPC-HD2000": 5.75, + "Sanyo Xacti VPC-S1070": 6.08, + "Sanyo Xacti VPC-S500": 5.75, + "Sanyo Xacti VPC-S600": 5.75, + "Sanyo Xacti VPC-S650": 5.75, + "Sanyo Xacti VPC-S670": 5.75, + "Sanyo Xacti VPC-S7": 5.75, + "Sanyo Xacti VPC-S750": 5.75, + "Sanyo Xacti VPC-S760": 5.75, + "Sanyo Xacti VPC-S770": 5.75, + "Sanyo Xacti VPC-S870": 5.75, + "Sanyo Xacti VPC-S880": 5.75, + "Sanyo Xacti VPC-T1060": 6.08, + "Sanyo Xacti VPC-T700": 5.75, + "Sanyo Xacti VPC-T850": 5.75, + "Sanyo Xacti VPC-W800": 5.75, + "Sanyo Xacti VPC-X1200": 6.16, + "Sigma DP1": 20.7, + "Sigma DP1 Merrill": 24.0, + "Sigma DP1s": 20.7, + "Sigma DP1x": 20.7, + "Sigma DP2": 20.7, + "Sigma DP2 Merrill": 24.0, + "Sigma DP2s": 20.7, + "Sigma DP2x": 20.7, + "Sigma DP3 Merrill": 24.0, + "Sigma SD1": 24.0, + "Sigma SD1 Merrill": 24.0, + "Sigma SD10": 20.7, + "Sigma SD14": 20.7, + "Sigma SD15": 20.7, + "Sigma SD9": 20.7, + "Skydio 2": 5.476, + "Sony A77 II": 23.5, + "Sony Alpha 7": 35.8, + "Sony Alpha 7R": 35.9, + "Sony Alpha 7S": 35.6, + "Sony Alpha DSLR-A100": 23.6, + "Sony Alpha DSLR-A200": 23.6, + "Sony Alpha DSLR-A230": 23.5, + "Sony Alpha DSLR-A290": 23.5, + "Sony Alpha DSLR-A300": 23.6, + "Sony Alpha DSLR-A330": 23.5, + "Sony Alpha DSLR-A350": 23.6, + "Sony Alpha DSLR-A380": 23.6, + "Sony Alpha DSLR-A390": 23.5, + "Sony Alpha DSLR-A450": 23.4, + "Sony Alpha DSLR-A500": 23.5, + "Sony Alpha DSLR-A550": 23.4, + "Sony Alpha DSLR-A560": 23.5, + "Sony Alpha DSLR-A580": 23.5, + "Sony Alpha DSLR-A700": 23.5, + "Sony Alpha DSLR-A850": 35.9, + "Sony Alpha DSLR-A900": 35.9, + "Sony Alpha NEX-3": 23.4, + "Sony Alpha NEX-3N": 23.5, + "Sony Alpha NEX-5": 23.4, + "Sony Alpha NEX-5N": 23.4, + "Sony Alpha NEX-5R": 23.4, + "Sony Alpha NEX-5T": 23.4, + "Sony Alpha NEX-6": 23.5, + "Sony Alpha NEX-7": 23.5, + "Sony Alpha NEX-C3": 23.4, + "Sony Alpha NEX-F3": 23.4, + "Sony Alpha a3000": 23.5, + "Sony Alpha a5000": 23.5, + "Sony Alpha a5100": 23.5, + "Sony Alpha a6000": 23.5, + "Sony C6903": 6.17, + "Sony Cyber-shot DSC-RX1": 35.8, + "Sony CyberShot DSC HX100V": 6.16, + "Sony CyberShot DSC HX7V": 6.16, + "Sony CyberShot DSC HX9V": 6.16, + "Sony Cybershot DSC D700": 6.4, + "Sony Cybershot DSC D770": 6.4, + "Sony Cybershot DSC F505": 6.4, + "Sony Cybershot DSC F505v": 7.11, + "Sony Cybershot DSC F55": 6.4, + "Sony Cybershot DSC F55v": 7.11, + "Sony Cybershot DSC F707": 8.8, + "Sony Cybershot DSC F717": 8.8, + "Sony Cybershot DSC F77": 7.11, + "Sony Cybershot DSC F828": 8.8, + "Sony Cybershot DSC F88": 5.9, + "Sony Cybershot DSC FX77": 7.11, + "Sony Cybershot DSC G1": 5.75, + "Sony Cybershot DSC G3": 6.16, + "Sony Cybershot DSC H1": 5.75, + "Sony Cybershot DSC H10": 5.75, + "Sony Cybershot DSC H100": 6.16, + "Sony Cybershot DSC H2": 5.75, + "Sony Cybershot DSC H20": 6.16, + "Sony Cybershot DSC H200": 6.16, + "Sony Cybershot DSC H3": 5.75, + "Sony Cybershot DSC H300": 6.16, + "Sony Cybershot DSC H400": 6.16, + "Sony Cybershot DSC H5": 5.75, + "Sony Cybershot DSC H50": 6.16, + "Sony Cybershot DSC H55": 6.16, + "Sony Cybershot DSC H7": 5.75, + "Sony Cybershot DSC H70": 6.16, + "Sony Cybershot DSC H9": 5.75, + "Sony Cybershot DSC H90": 6.16, + "Sony Cybershot DSC HX1": 5.9, + "Sony Cybershot DSC HX10V": 6.16, + "Sony Cybershot DSC HX200V": 6.16, + "Sony Cybershot DSC HX20V": 6.16, + "Sony Cybershot DSC HX30V": 6.16, + "Sony Cybershot DSC HX5": 5.9, + "Sony Cybershot DSC HX50": 6.16, + "Sony Cybershot DSC HX60": 6.16, + "Sony Cybershot DSC J10": 6.16, + "Sony Cybershot DSC L1": 5.33, + "Sony Cybershot DSC M1": 5.9, + "Sony Cybershot DSC M2": 5.75, + "Sony Cybershot DSC N1": 7.11, + "Sony Cybershot DSC N2": 7.53, + "Sony Cybershot DSC P1": 7.11, + "Sony Cybershot DSC P10": 7.11, + "Sony Cybershot DSC P100": 7.11, + "Sony Cybershot DSC P12": 7.11, + "Sony Cybershot DSC P120": 7.11, + "Sony Cybershot DSC P150": 7.11, + "Sony Cybershot DSC P2": 5.33, + "Sony Cybershot DSC P20": 5.33, + "Sony Cybershot DSC P200": 7.11, + "Sony Cybershot DSC P3": 7.11, + "Sony Cybershot DSC P30": 5.33, + "Sony Cybershot DSC P31": 5.33, + "Sony Cybershot DSC P32": 5.33, + "Sony Cybershot DSC P41": 5.33, + "Sony Cybershot DSC P43": 5.33, + "Sony Cybershot DSC P5": 7.11, + "Sony Cybershot DSC P50": 5.33, + "Sony Cybershot DSC P51": 5.33, + "Sony Cybershot DSC P52": 5.33, + "Sony Cybershot DSC P7": 7.11, + "Sony Cybershot DSC P71": 7.11, + "Sony Cybershot DSC P72": 7.11, + "Sony Cybershot DSC P73": 5.33, + "Sony Cybershot DSC P8": 5.33, + "Sony Cybershot DSC P9": 7.11, + "Sony Cybershot DSC P92": 7.11, + "Sony Cybershot DSC P93": 7.11, + "Sony Cybershot DSC R1": 21.5, + "Sony Cybershot DSC RX100": 13.2, + "Sony Cybershot DSC RX100 II": 13.2, + "Sony Cybershot DSC RX1R": 35.8, + "Sony Cybershot DSC S2000": 6.16, + "Sony Cybershot DSC S2100": 6.16, + "Sony Cybershot DSC S30": 5.33, + "Sony Cybershot DSC S3000": 4.96, + "Sony Cybershot DSC S40": 5.33, + "Sony Cybershot DSC S45": 5.75, + "Sony Cybershot DSC S50": 5.33, + "Sony Cybershot DSC S5000": 6.16, + "Sony Cybershot DSC S60": 5.33, + "Sony Cybershot DSC S600": 5.75, + "Sony Cybershot DSC S650": 5.75, + "Sony Cybershot DSC S70": 7.11, + "Sony Cybershot DSC S700": 5.75, + "Sony Cybershot DSC S730": 5.75, + "Sony Cybershot DSC S75": 7.11, + "Sony Cybershot DSC S750": 5.75, + "Sony Cybershot DSC S780": 5.75, + "Sony Cybershot DSC S80": 5.33, + "Sony Cybershot DSC S800": 7.11, + "Sony Cybershot DSC S85": 7.11, + "Sony Cybershot DSC S90": 5.33, + "Sony Cybershot DSC S930": 6.08, + "Sony Cybershot DSC S950": 6.16, + "Sony Cybershot DSC S980": 6.08, + "Sony Cybershot DSC T1": 5.9, + "Sony Cybershot DSC T10": 5.75, + "Sony Cybershot DSC T100": 5.75, + "Sony Cybershot DSC T11": 5.9, + "Sony Cybershot DSC T110": 6.16, + "Sony Cybershot DSC T2": 5.75, + "Sony Cybershot DSC T20": 5.75, + "Sony Cybershot DSC T200": 5.75, + "Sony Cybershot DSC T3": 5.9, + "Sony Cybershot DSC T30": 5.75, + "Sony Cybershot DSC T300": 6.16, + "Sony Cybershot DSC T33": 5.9, + "Sony Cybershot DSC T5": 5.75, + "Sony Cybershot DSC T50": 5.75, + "Sony Cybershot DSC T500": 6.16, + "Sony Cybershot DSC T7": 5.9, + "Sony Cybershot DSC T70": 5.75, + "Sony Cybershot DSC T700": 6.16, + "Sony Cybershot DSC T77": 6.16, + "Sony Cybershot DSC T9": 5.75, + "Sony Cybershot DSC T90": 6.16, + "Sony Cybershot DSC T900": 6.16, + "Sony Cybershot DSC T99": 6.16, + "Sony Cybershot DSC TX1": 5.9, + "Sony Cybershot DSC TX10": 6.16, + "Sony Cybershot DSC TX100V": 6.16, + "Sony Cybershot DSC TX20": 6.16, + "Sony Cybershot DSC TX200V": 6.16, + "Sony Cybershot DSC TX5": 5.9, + "Sony Cybershot DSC TX55": 6.16, + "Sony Cybershot DSC TX66": 6.16, + "Sony Cybershot DSC TX7": 5.9, + "Sony Cybershot DSC TX9": 6.16, + "Sony Cybershot DSC U10": 5.33, + "Sony Cybershot DSC U20": 5.33, + "Sony Cybershot DSC U30": 5.33, + "Sony Cybershot DSC U40": 5.33, + "Sony Cybershot DSC U50": 5.33, + "Sony Cybershot DSC U60": 5.33, + "Sony Cybershot DSC V1": 7.11, + "Sony Cybershot DSC V3": 7.11, + "Sony Cybershot DSC W1": 7.11, + "Sony Cybershot DSC W100": 7.11, + "Sony Cybershot DSC W110": 5.75, + "Sony Cybershot DSC W115": 5.75, + "Sony Cybershot DSC W12": 7.11, + "Sony Cybershot DSC W120": 5.75, + "Sony Cybershot DSC W125": 5.75, + "Sony Cybershot DSC W130": 5.75, + "Sony Cybershot DSC W150": 5.75, + "Sony Cybershot DSC W17": 7.11, + "Sony Cybershot DSC W170": 6.16, + "Sony Cybershot DSC W180": 6.08, + "Sony Cybershot DSC W190": 6.08, + "Sony Cybershot DSC W200": 7.53, + "Sony Cybershot DSC W210": 6.16, + "Sony Cybershot DSC W215": 6.16, + "Sony Cybershot DSC W220": 6.16, + "Sony Cybershot DSC W230": 6.16, + "Sony Cybershot DSC W270": 7.53, + "Sony Cybershot DSC W275": 6.16, + "Sony Cybershot DSC W290": 6.16, + "Sony Cybershot DSC W30": 5.75, + "Sony Cybershot DSC W300": 7.53, + "Sony Cybershot DSC W310": 6.16, + "Sony Cybershot DSC W320": 6.16, + "Sony Cybershot DSC W330": 6.16, + "Sony Cybershot DSC W35": 5.75, + "Sony Cybershot DSC W350": 6.16, + "Sony Cybershot DSC W360": 6.16, + "Sony Cybershot DSC W370": 6.16, + "Sony Cybershot DSC W380": 6.16, + "Sony Cybershot DSC W40": 5.75, + "Sony Cybershot DSC W5": 7.11, + "Sony Cybershot DSC W50": 5.75, + "Sony Cybershot DSC W510": 6.16, + "Sony Cybershot DSC W520": 6.16, + "Sony Cybershot DSC W530": 6.16, + "Sony Cybershot DSC W55": 5.75, + "Sony Cybershot DSC W550": 6.16, + "Sony Cybershot DSC W560": 6.16, + "Sony Cybershot DSC W570": 6.16, + "Sony Cybershot DSC W580": 6.16, + "Sony Cybershot DSC W610": 6.16, + "Sony Cybershot DSC W620": 6.16, + "Sony Cybershot DSC W630": 6.16, + "Sony Cybershot DSC W650": 6.16, + "Sony Cybershot DSC W670": 6.16, + "Sony Cybershot DSC W690": 6.16, + "Sony Cybershot DSC W7": 7.11, + "Sony Cybershot DSC W70": 5.75, + "Sony Cybershot DSC W710": 6.16, + "Sony Cybershot DSC W730": 6.16, + "Sony Cybershot DSC W80": 5.75, + "Sony Cybershot DSC W800": 6.16, + "Sony Cybershot DSC W810": 6.16, + "Sony Cybershot DSC W830": 6.16, + "Sony Cybershot DSC W85": 5.75, + "Sony Cybershot DSC W90": 5.75, + "Sony Cybershot DSC WX1": 5.9, + "Sony Cybershot DSC WX10": 6.16, + "Sony Cybershot DSC WX100": 6.16, + "Sony Cybershot DSC WX150": 6.16, + "Sony Cybershot DSC WX220": 6.16, + "Sony Cybershot DSC WX30": 6.16, + "Sony Cybershot DSC WX350": 6.16, + "Sony Cybershot DSC WX5": 6.16, + "Sony Cybershot DSC WX50": 6.16, + "Sony Cybershot DSC WX7": 6.16, + "Sony Cybershot DSC WX70": 6.16, + "Sony Cybershot DSC WX9": 6.16, + "Sony Cybershot DSC-HX300": 6.16, + "Sony Cybershot DSC-HX400": 6.16, + "Sony Cybershot DSC-QX10": 6.16, + "Sony Cybershot DSC-QX100": 13.2, + "Sony Cybershot DSC-RX10": 13.2, + "Sony Cybershot DSC-RX100 III": 13.2, + "Sony Cybershot DSC-TF1": 6.16, + "Sony Cybershot DSC-TX30": 6.16, + "Sony Cybershot DSC-WX200": 6.16, + "Sony Cybershot DSC-WX300": 6.16, + "Sony Cybershot DSC-WX60": 6.16, + "Sony Cybershot DSC-WX80": 6.16, + "Sony D5503": 6.17, + "Sony Mavica CD1000": 5.33, + "Sony Mavica CD200": 5.33, + "Sony Mavica CD250": 5.33, + "Sony Mavica CD300": 7.11, + "Sony Mavica CD350": 5.33, + "Sony Mavica CD400": 7.11, + "Sony Mavica CD500": 7.11, + "Sony Mavica FD-100": 5.33, + "Sony Mavica FD-200": 5.33, + "Sony Mavica FD-71": 6.4, + "Sony Mavica FD-73": 6.4, + "Sony Mavica FD-75": 7.11, + "Sony Mavica FD-81": 4.8, + "Sony Mavica FD-83": 4.8, + "Sony Mavica FD-85": 5.33, + "Sony Mavica FD-87": 5.33, + "Sony Mavica FD-88": 4.8, + "Sony Mavica FD-90": 5.33, + "Sony Mavica FD-91": 4.8, + "Sony Mavica FD-92": 5.33, + "Sony Mavica FD-95": 5.33, + "Sony Mavica FD-97": 5.33, + "Sony QX1": 23.5, + "Sony QX30": 6.16, + "Sony SLT-A33": 23.5, + "Sony SLT-A35": 23.5, + "Sony SLT-A37": 23.5, + "Sony SLT-A55": 23.5, + "Sony SLT-A57": 23.5, + "Sony SLT-A58": 23.2, + "Sony SLT-A65": 23.5, + "Sony SLT-A77": 23.5, + "Sony SLT-A99": 35.8, + "Teracube Teracube One": 4.71, + "Toshiba PDR 2300": 5.33, + "Toshiba PDR 3300": 7.11, + "Toshiba PDR 3310": 7.11, + "Toshiba PDR 3320": 7.11, + "Toshiba PDR 4300": 7.11, + "Toshiba PDR 5300": 7.11, + "Toshiba PDR M25": 5.33, + "Toshiba PDR M5": 6.4, + "Toshiba PDR M500": 5.33, + "Toshiba PDR M60": 6.4, + "Toshiba PDR M61": 6.4, + "Toshiba PDR M65": 6.4, + "Toshiba PDR M70": 7.11, + "Toshiba PDR M700": 5.33, + "Toshiba PDR M71": 7.11, + "Toshiba PDR M81": 7.11, + "Toshiba PDR T10": 5.33, + "Toshiba PDR T20": 5.33, + "Toshiba PDR T30": 5.33, + "Vivitar V8025": 7.11, + "Vivitar ViviCam 5105s": 5.75, + "Vivitar ViviCam 5150s": 5.75, + "Vivitar ViviCam 5160s": 5.75, + "Vivitar ViviCam 5195": 5.75, + "Vivitar ViviCam 5350s": 5.75, + "Vivitar ViviCam 5355": 5.75, + "Vivitar ViviCam 5385": 5.75, + "Vivitar ViviCam 5386": 5.75, + "Vivitar ViviCam 5388": 5.75, + "Vivitar ViviCam 6150s": 5.75, + "Vivitar ViviCam 6200w": 5.75, + "Vivitar ViviCam 6300": 5.75, + "Vivitar ViviCam 6320": 5.75, + "Vivitar ViviCam 6326": 5.75, + "Vivitar ViviCam 6330": 5.75, + "Vivitar ViviCam 6380u": 5.75, + "Vivitar ViviCam 6385u": 5.75, + "Vivitar ViviCam 6388s": 5.75, + "Vivitar ViviCam 7100s": 5.75, + "Vivitar ViviCam 7310": 5.75, + "Vivitar ViviCam 7388s": 5.75, + "Vivitar ViviCam 7500i": 5.75, + "Vivitar ViviCam 8300s": 7.11, + "Vivitar ViviCam 8400": 7.11, + "Vivitar ViviCam 8600": 7.11, + "Vivitar ViviCam 8600s": 7.11, + "Vivitar ViviCam 8625": 7.11, + "Vivitar ViviCam X30": 7.11, + "Vivitar ViviCam X60": 7.11, + "Yakumo CamMaster SD 432": 5.33, + "Yakumo CamMaster SD 482": 5.75, + "Yakumo Mega Image 34": 5.33, + "Yakumo Mega Image 35": 7.11, + "Yakumo Mega Image 37": 5.75, + "Yakumo Mega Image 410": 5.75, + "Yakumo Mega Image 45": 7.11, + "Yakumo Mega Image 47": 5.75, + "Yakumo Mega Image 47 SL": 5.75, + "Yakumo Mega Image 47sx": 7.11, + "Yakumo Mega Image 55cx": 7.11, + "Yakumo Mega Image 57": 7.11, + "Yakumo Mega Image 57x": 7.11, + "Yakumo Mega Image 610x": 7.11, + "Yakumo Mega Image 67x": 7.11, + "Yakumo Mega Image 811x": 7.11, + "Yakumo Mega Image 84 D": 5.75, + "Yakumo Mega Image 85D": 5.75, + "Yakumo Mega Image II": 7.11, + "Yakumo Mega Image IV": 7.11, + "Yakumo Mega Image VI": 7.11, + "Yakumo Mega Image VII": 6.4, + "Yakumo Mega Image X": 7.11, + "Yakumo Mega Image XL": 4.23, + "Yakumo Mega Image XS": 6.4, + "YTXJ02FM": 6.17, + "Yuneec E90": 13.2, + "Google Nexus S": 3.9, + "HUAWEI HUAWEI P6-U06": 4.8, + "oneplu A000": 4.8, + "SAMSUNG GT-I9300": 4.53, + "SAMSUNG GT-I9195": 4.54, + "LG Electronics LG-D390n": 4.8, + "Sony Xperia Z1": 6.17, + "Apple iPhone7,2": 6.1, + "LG Electronics LG-D855": 4.8, + "Canon EOS DIGITAL REBEL XT": 22.2, + "Canon EOS DIGITAL REBEL XTi": 22.2, + "Canon EOS Kiss Digital": 22.66, + "Canon IXY DIGITAL 600": 7.176, + "DJI PHANTOM VISION FC200": 6.17, + "GoPro HD2 U": 5.8, + "GoPro HERO4 Black": 6.17, + "GoPro HERO4 Silver": 6.17, + "Gopro HD3": 5.76, + "KONICA MILOLTA DYNAX 5D": 23.5, + "Mantis i23": 45.0, + "Mobius Action Cam": 2.1, + "NIKON COOLPIX L3": 5.76, + "NIKON E2500": 5.27, + "NIKON E3100": 5.27, + "NIKON E3200": 5.27, + "NIKON E3700": 5.27, + "NIKON E4200": 7.176, + "NIKON E4300": 7.18, + "NIKON E4500": 7.176, + "NIKON E4600": 5.76, + "NIKON E5000": 8.8, + "NIKON E5200": 7.176, + "NIKON E5400": 7.176, + "NIKON E5600": 5.76, + "NIKON E5700": 8.8, + "NIKON E5900": 7.176, + "NIKON E7600": 7.176, + "NIKON E775": 5.27, + "NIKON E7900": 7.176, + "NIKON E8800": 8.8, + "NIKON E990": 7.176, + "NIKON E995": 7.176, + "Nokia N80": 5.27, + "Nokia N93": 4.536, + "Nokia N95": 5.7, + "OLYMPUS_IMAGING_CORP. X450,D535Z,C370Z": 5.27, + "PHANTOM VISION FC200": 6.17, + "SAMSUNG TECHWIN Pro 815": 8.8, + "SONY DSC-HX5V": 6.104, + "SONY DSC-N12": 7.176, + "SONY ILCE-5100": 23.5, + "SONY ILCE-7S": 35.8, + "SONY SLT-A77V": 23.5, + "GITUP GIT2": 6.216, + "DJI ZH20T": 7.68, + "DJI XT2": 10.88, + "DJI ZENMUSEP1": 35.9 +} diff --git a/utils/tools.py b/utils/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..adbc3eae97c599d9ce606426ae5628484b9e2499 --- /dev/null +++ b/utils/tools.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import time + + +class Timer: + def __init__(self, name=None): + self.name = name + + def __enter__(self): + self.tstart = time.time() + return self + + def __exit__(self, type, value, traceback): + self.duration = time.time() - self.tstart + if self.name is not None: + print("[%s] Elapsed: %s" % (self.name, self.duration)) diff --git a/utils/viz_2d.py b/utils/viz_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..09123e81160196f905580e0432847fa97247e468 --- /dev/null +++ b/utils/viz_2d.py @@ -0,0 +1,195 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +# Adapted from Hierarchical-Localization, Paul-Edouard Sarlin, ETH Zurich +# https://github.com/cvg/Hierarchical-Localization/blob/master/hloc/utils/viz.py +# Released under the Apache License 2.0 + +import matplotlib +import matplotlib.patheffects as path_effects +import matplotlib.pyplot as plt +import numpy as np + + +def plot_images(imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True): + """Plot a set of images horizontally. + Args: + imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W). + titles: a list of strings, as titles for each image. + cmaps: colormaps for monochrome images. + adaptive: whether the figure size should fit the image aspect ratios. + """ + n = len(imgs) + if not isinstance(cmaps, (list, tuple)): + cmaps = [cmaps] * n + + if adaptive: + ratios = [i.shape[1] / i.shape[0] for i in imgs] # W / H + else: + ratios = [4 / 3] * n + figsize = [sum(ratios) * 4.5, 4.5] + fig, ax = plt.subplots( + 1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios} + ) + if n == 1: + ax = [ax] + for i in range(n): + ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i])) + ax[i].get_yaxis().set_ticks([]) + ax[i].get_xaxis().set_ticks([]) + ax[i].set_axis_off() + for spine in ax[i].spines.values(): # remove frame + spine.set_visible(False) + if titles: + ax[i].set_title(titles[i]) + fig.tight_layout(pad=pad) + # fig.show() + + +def plot_keypoints(kpts, colors="lime", ps=4): + """Plot keypoints for existing images. + Args: + kpts: list of ndarrays of size (N, 2). + colors: string, or list of list of tuples (one for each keypoints). + ps: size of the keypoints as float. + """ + if not isinstance(colors, list): + colors = [colors] * len(kpts) + axes = plt.gcf().axes + for a, k, c in zip(axes, kpts, colors): + a.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0) + + +def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0): + """Plot matches for a pair of existing images. + Args: + kpts0, kpts1: corresponding keypoints of size (N, 2). + color: color of each match, string or RGB tuple. Random if not given. + lw: width of the lines. + ps: size of the end points (no endpoint if ps=0) + indices: indices of the images to draw the matches on. + a: alpha opacity of the match lines. + """ + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + ax0, ax1 = ax[indices[0]], ax[indices[1]] + fig.canvas.draw() + + assert len(kpts0) == len(kpts1) + if color is None: + color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist() + elif len(color) > 0 and not isinstance(color[0], (tuple, list)): + color = [color] * len(kpts0) + + if lw > 0: + # transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(ax0.transData.transform(kpts0)) + fkpts1 = transFigure.transform(ax1.transData.transform(kpts1)) + fig.lines += [ + matplotlib.lines.Line2D( + (fkpts0[i, 0], fkpts1[i, 0]), + (fkpts0[i, 1], fkpts1[i, 1]), + zorder=1, + transform=fig.transFigure, + c=color[i], + linewidth=lw, + alpha=a, + ) + for i in range(len(kpts0)) + ] + + # freeze the axes to prevent the transform to change + ax0.autoscale(enable=False) + ax1.autoscale(enable=False) + + if ps > 0: + ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) + ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) + + +def add_text( + idx, + text, + pos=(0.01, 0.99), + fs=15, + color="w", + lcolor="k", + lwidth=2, + ha="left", + va="top", + normalized=True, + zorder=3, +): + ax = plt.gcf().axes[idx] + tfm = ax.transAxes if normalized else ax.transData + t = ax.text( + *pos, + text, + fontsize=fs, + ha=ha, + va=va, + color=color, + transform=tfm, + clip_on=True, + zorder=zorder, + ) + if lcolor is not None: + t.set_path_effects( + [ + path_effects.Stroke(linewidth=lwidth, foreground=lcolor), + path_effects.Normal(), + ] + ) + + +def save_plot(path, **kw): + """Save the current figure without any white margin.""" + plt.savefig(path, bbox_inches="tight", pad_inches=0, **kw) + + +def features_to_RGB(*Fs, masks=None, skip=1): + """Project a list of d-dimensional feature maps to RGB colors using PCA.""" + from sklearn.decomposition import PCA + + def normalize(x): + return x / np.linalg.norm(x, axis=-1, keepdims=True) + + if masks is not None: + assert len(Fs) == len(masks) + + flatten = [] + for i, F in enumerate(Fs): + c, h, w = F.shape + F = np.rollaxis(F, 0, 3) + F_flat = F.reshape(-1, c) + if masks is not None and masks[i] is not None: + mask = masks[i] + assert mask.shape == F.shape[:2] + F_flat = F_flat[mask.reshape(-1)] + flatten.append(F_flat) + flatten = np.concatenate(flatten, axis=0) + flatten = normalize(flatten) + + pca = PCA(n_components=3) + if skip > 1: + pca.fit(flatten[::skip]) + flatten = pca.transform(flatten) + else: + flatten = pca.fit_transform(flatten) + flatten = (normalize(flatten) + 1) / 2 + + Fs_rgb = [] + for i, F in enumerate(Fs): + h, w = F.shape[-2:] + if masks is None or masks[i] is None: + F_rgb, flatten = np.split(flatten, [h * w], axis=0) + F_rgb = F_rgb.reshape((h, w, 3)) + else: + F_rgb = np.zeros((h, w, 3)) + indices = np.where(masks[i]) + F_rgb[indices], flatten = np.split(flatten, [len(indices[0])], axis=0) + F_rgb = np.concatenate([F_rgb, masks[i][..., None]], axis=-1) + Fs_rgb.append(F_rgb) + assert flatten.shape[0] == 0, flatten.shape + return Fs_rgb diff --git a/utils/viz_localization.py b/utils/viz_localization.py new file mode 100644 index 0000000000000000000000000000000000000000..3e15da2329ae2956c4eb663dfcd3a0cf841f2b84 --- /dev/null +++ b/utils/viz_localization.py @@ -0,0 +1,160 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +import copy + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import torch + + +def likelihood_overlay( + prob, map_viz=None, p_rgb=0.2, p_alpha=1 / 15, thresh=None, cmap="jet" +): + prob = prob / prob.max() + cmap = plt.get_cmap(cmap) + rgb = cmap(prob**p_rgb) + alpha = prob[..., None] ** p_alpha + if thresh is not None: + alpha[prob <= thresh] = 0 + if map_viz is not None: + faded = map_viz + (1 - map_viz) * 0.5 + rgb = rgb[..., :3] * alpha + faded * (1 - alpha) + rgb = np.clip(rgb, 0, 1) + else: + rgb[..., -1] = alpha.squeeze(-1) + return rgb + + +def heatmap2rgb(scores, mask=None, clip_min=0.05, alpha=0.8, cmap="jet"): + min_, max_ = np.quantile(scores, [clip_min, 1]) + scores = scores.clip(min=min_) + rgb = plt.get_cmap(cmap)((scores - min_) / (max_ - min_)) + if mask is not None: + if alpha == 0: + rgb[mask] = np.nan + else: + rgb[..., -1] = 1 - (1 - 1.0 * mask) * (1 - alpha) + return rgb + + +def plot_pose(axs, xy, yaw=None, s=1 / 35, c="r", a=1, w=0.015, dot=True, zorder=10): + if yaw is not None: + yaw = np.deg2rad(yaw) + uv = np.array([np.sin(yaw), -np.cos(yaw)]) + xy = np.array(xy) + 0.5 + if not isinstance(axs, list): + axs = [axs] + for ax in axs: + if isinstance(ax, int): + ax = plt.gcf().axes[ax] + if dot: + ax.scatter(*xy, c=c, s=70, zorder=zorder, linewidths=0, alpha=a) + if yaw is not None: + ax.quiver( + *xy, + *uv, + scale=s, + scale_units="xy", + angles="xy", + color=c, + zorder=zorder, + alpha=a, + width=w, + ) + + +def plot_dense_rotations( + ax, prob, thresh=0.01, skip=10, s=1 / 15, k=3, c="k", w=None, **kwargs +): + t = torch.argmax(prob, -1) + yaws = t.numpy() / prob.shape[-1] * 360 + prob = prob.max(-1).values / prob.max() + mask = prob > thresh + masked = prob.masked_fill(~mask, 0) + max_ = torch.nn.functional.max_pool2d( + masked.float()[None, None], k, stride=1, padding=k // 2 + ) + mask = (max_[0, 0] == masked.float()) & mask + indices = np.where(mask.numpy() > 0) + plot_pose( + ax, + indices[::-1], + yaws[indices], + s=s, + c=c, + dot=False, + zorder=0.1, + w=w, + **kwargs, + ) + + +def copy_image(im, ax): + prop = im.properties() + prop.pop("children") + prop.pop("size") + prop.pop("tightbbox") + prop.pop("transformed_clip_path_and_affine") + prop.pop("window_extent") + prop.pop("figure") + prop.pop("transform") + return ax.imshow(im.get_array(), **prop) + + +def add_circle_inset( + ax, + center, + corner=None, + radius_px=10, + inset_size=0.4, + inset_offset=0.005, + color="red", +): + data_t_axes = ax.transAxes + ax.transData.inverted() + if corner is None: + center_axes = np.array(data_t_axes.inverted().transform(center)) + corner = 1 - np.round(center_axes).astype(int) + corner = np.array(corner) + bottom_left = corner * (1 - inset_size - inset_offset) + (1 - corner) * inset_offset + axins = ax.inset_axes([*bottom_left, inset_size, inset_size]) + if ax.yaxis_inverted(): + axins.invert_yaxis() + axins.set_axis_off() + + c = mpl.patches.Circle(center, radius_px, fill=False, color=color) + c1 = mpl.patches.Circle(center, radius_px, fill=False, color=color) + # ax.add_patch(c) + ax.add_patch(c1) + # ax.add_patch(c.frozen()) + axins.add_patch(c) + + radius_inset = radius_px + 1 + axins.set_xlim([center[0] - radius_inset, center[0] + radius_inset]) + ylim = center[1] - radius_inset, center[1] + radius_inset + if axins.yaxis_inverted(): + ylim = ylim[::-1] + axins.set_ylim(ylim) + + for im in ax.images: + im2 = copy_image(im, axins) + im2.set_clip_path(c) + return axins + + +def plot_bev(bev, uv, yaw, ax=None, zorder=10, **kwargs): + if ax is None: + ax = plt.gca() + h, w = bev.shape[:2] + tfm = mpl.transforms.Affine2D().translate(-w / 2, -h) + tfm = tfm.rotate_deg(yaw).translate(*uv + 0.5) + tfm += plt.gca().transData + ax.imshow(bev, transform=tfm, zorder=zorder, **kwargs) + ax.plot( + [0, w - 1, w / 2, 0], + [0, 0, h - 0.5, 0], + transform=tfm, + c="k", + lw=1, + zorder=zorder + 1, + ) diff --git a/utils/wrappers.py b/utils/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..4d8b35a35fdc83569e943735e7433ed72da1343e --- /dev/null +++ b/utils/wrappers.py @@ -0,0 +1,342 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich +# https://github.com/cvg/pixloc +# Released under the Apache License 2.0 + +""" +Convenience classes for an SE3 pose and a pinhole Camera with lens distortion. +Based on PyTorch tensors: differentiable, batched, with GPU support. +""" + +import functools +import inspect +import math +from typing import Dict, List, NamedTuple, Tuple, Union + +import numpy as np +import torch + +from .geometry import undistort_points + + +def autocast(func): + """Cast the inputs of a TensorWrapper method to PyTorch tensors + if they are numpy arrays. Use the device and dtype of the wrapper. + """ + + @functools.wraps(func) + def wrap(self, *args): + device = torch.device("cpu") + dtype = None + if isinstance(self, TensorWrapper): + if self._data is not None: + device = self.device + dtype = self.dtype + elif not inspect.isclass(self) or not issubclass(self, TensorWrapper): + raise ValueError(self) + + cast_args = [] + for arg in args: + if isinstance(arg, np.ndarray): + arg = torch.from_numpy(arg) + arg = arg.to(device=device, dtype=dtype) + cast_args.append(arg) + return func(self, *cast_args) + + return wrap + + +class TensorWrapper: + _data = None + + @autocast + def __init__(self, data: torch.Tensor): + self._data = data + + @property + def shape(self): + return self._data.shape[:-1] + + @property + def device(self): + return self._data.device + + @property + def dtype(self): + return self._data.dtype + + def __getitem__(self, index): + return self.__class__(self._data[index]) + + def __setitem__(self, index, item): + self._data[index] = item.data + + def to(self, *args, **kwargs): + return self.__class__(self._data.to(*args, **kwargs)) + + def cpu(self): + return self.__class__(self._data.cpu()) + + def cuda(self): + return self.__class__(self._data.cuda()) + + def pin_memory(self): + return self.__class__(self._data.pin_memory()) + + def float(self): + return self.__class__(self._data.float()) + + def double(self): + return self.__class__(self._data.double()) + + def detach(self): + return self.__class__(self._data.detach()) + + @classmethod + def stack(cls, objects: List, dim=0, *, out=None): + data = torch.stack([obj._data for obj in objects], dim=dim, out=out) + return cls(data) + + @classmethod + def __torch_function__(cls, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + if func is torch.stack: + return cls.stack(*args, **kwargs) + else: + return NotImplemented + + +class Pose(TensorWrapper): + def __init__(self, data: torch.Tensor): + assert data.shape[-1] == 12 + super().__init__(data) + + @classmethod + @autocast + def from_Rt(cls, R: torch.Tensor, t: torch.Tensor): + """Pose from a rotation matrix and translation vector. + Accepts numpy arrays or PyTorch tensors. + + Args: + R: rotation matrix with shape (..., 3, 3). + t: translation vector with shape (..., 3). + """ + assert R.shape[-2:] == (3, 3) + assert t.shape[-1] == 3 + assert R.shape[:-2] == t.shape[:-1] + data = torch.cat([R.flatten(start_dim=-2), t], -1) + return cls(data) + + @classmethod + def from_4x4mat(cls, T: torch.Tensor): + """Pose from an SE(3) transformation matrix. + Args: + T: transformation matrix with shape (..., 4, 4). + """ + assert T.shape[-2:] == (4, 4) + R, t = T[..., :3, :3], T[..., :3, 3] + return cls.from_Rt(R, t) + + @classmethod + def from_colmap(cls, image: NamedTuple): + """Pose from a COLMAP Image.""" + return cls.from_Rt(image.qvec2rotmat(), image.tvec) + + @property + def R(self) -> torch.Tensor: + """Underlying rotation matrix with shape (..., 3, 3).""" + rvec = self._data[..., :9] + return rvec.reshape(rvec.shape[:-1] + (3, 3)) + + @property + def t(self) -> torch.Tensor: + """Underlying translation vector with shape (..., 3).""" + return self._data[..., -3:] + + def inv(self) -> "Pose": + """Invert an SE(3) pose.""" + R = self.R.transpose(-1, -2) + t = -(R @ self.t.unsqueeze(-1)).squeeze(-1) + return self.__class__.from_Rt(R, t) + + def compose(self, other: "Pose") -> "Pose": + """Chain two SE(3) poses: T_B2C.compose(T_A2B) -> T_A2C.""" + R = self.R @ other.R + t = self.t + (self.R @ other.t.unsqueeze(-1)).squeeze(-1) + return self.__class__.from_Rt(R, t) + + @autocast + def transform(self, p3d: torch.Tensor) -> torch.Tensor: + """Transform a set of 3D points. + Args: + p3d: 3D points, numpy array or PyTorch tensor with shape (..., 3). + """ + assert p3d.shape[-1] == 3 + # assert p3d.shape[:-2] == self.shape # allow broadcasting + return p3d @ self.R.transpose(-1, -2) + self.t.unsqueeze(-2) + + def __matmul__( + self, other: Union["Pose", torch.Tensor] + ) -> Union["Pose", torch.Tensor]: + """Transform a set of 3D points: T_A2B * p3D_A -> p3D_B. + or chain two SE(3) poses: T_B2C @ T_A2B -> T_A2C.""" + if isinstance(other, self.__class__): + return self.compose(other) + else: + return self.transform(other) + + def numpy(self) -> Tuple[np.ndarray]: + return self.R.numpy(), self.t.numpy() + + def magnitude(self) -> Tuple[torch.Tensor]: + """Magnitude of the SE(3) transformation. + Returns: + dr: rotation anngle in degrees. + dt: translation distance in meters. + """ + trace = torch.diagonal(self.R, dim1=-1, dim2=-2).sum(-1) + cos = torch.clamp((trace - 1) / 2, -1, 1) + dr = torch.acos(cos).abs() / math.pi * 180 + dt = torch.norm(self.t, dim=-1) + return dr, dt + + def __repr__(self): + return f"Pose: {self.shape} {self.dtype} {self.device}" + + +class Camera(TensorWrapper): + eps = 1e-4 + + def __init__(self, data: torch.Tensor): + assert data.shape[-1] in {6, 8, 10} + super().__init__(data) + + @classmethod + def from_dict(cls, camera: Union[Dict, NamedTuple]): + """Camera from a COLMAP Camera tuple or dictionary. + We assume that the origin (0, 0) is the center of the top-left pixel. + This is different from COLMAP. + """ + if isinstance(camera, tuple): + camera = camera._asdict() + + model = camera["model"] + params = camera["params"] + + if model in ["OPENCV", "PINHOLE"]: + (fx, fy, cx, cy), params = np.split(params, [4]) + elif model in ["SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"]: + (f, cx, cy), params = np.split(params, [3]) + fx = fy = f + if model == "SIMPLE_RADIAL": + params = np.r_[params, 0.0] + else: + raise NotImplementedError(model) + + data = np.r_[ + camera["width"], camera["height"], fx, fy, cx - 0.5, cy - 0.5, params + ] + return cls(data) + + @property + def size(self) -> torch.Tensor: + """Size (width height) of the images, with shape (..., 2).""" + return self._data[..., :2] + + @property + def f(self) -> torch.Tensor: + """Focal lengths (fx, fy) with shape (..., 2).""" + return self._data[..., 2:4] + + @property + def c(self) -> torch.Tensor: + """Principal points (cx, cy) with shape (..., 2).""" + return self._data[..., 4:6] + + @property + def dist(self) -> torch.Tensor: + """Distortion parameters, with shape (..., {0, 2, 4}).""" + return self._data[..., 6:] + + def scale(self, scales: Union[float, int, Tuple[Union[float, int]]]): + """Update the camera parameters after resizing an image.""" + if isinstance(scales, (int, float)): + scales = (scales, scales) + s = self._data.new_tensor(scales) + data = torch.cat( + [self.size * s, self.f * s, (self.c + 0.5) * s - 0.5, self.dist], -1 + ) + return self.__class__(data) + + def crop(self, left_top: Tuple[float], size: Tuple[int]): + """Update the camera parameters after cropping an image.""" + left_top = self._data.new_tensor(left_top) + size = self._data.new_tensor(size) + data = torch.cat([size, self.f, self.c - left_top, self.dist], -1) + return self.__class__(data) + + @autocast + def in_image(self, p2d: torch.Tensor): + """Check if 2D points are within the image boundaries.""" + assert p2d.shape[-1] == 2 + # assert p2d.shape[:-2] == self.shape # allow broadcasting + size = self.size.unsqueeze(-2) + valid = torch.all((p2d >= 0) & (p2d <= (size - 1)), -1) + return valid + + @autocast + def project(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]: + """Project 3D points into the camera plane and check for visibility.""" + z = p3d[..., -1] + valid = z > self.eps + z = z.clamp(min=self.eps) + p2d = p3d[..., :-1] / z.unsqueeze(-1) + return p2d, valid + + def J_project(self, p3d: torch.Tensor): + x, y, z = p3d[..., 0], p3d[..., 1], p3d[..., 2] + zero = torch.zeros_like(z) + J = torch.stack([1 / z, zero, -x / z**2, zero, 1 / z, -y / z**2], dim=-1) + J = J.reshape(p3d.shape[:-1] + (2, 3)) + return J # N x 2 x 3 + + @autocast + def undistort(self, pts: torch.Tensor) -> Tuple[torch.Tensor]: + """Undistort normalized 2D coordinates + and check for validity of the distortion model. + """ + assert pts.shape[-1] == 2 + # assert pts.shape[:-2] == self.shape # allow broadcasting + return undistort_points(pts, self.dist) + + @autocast + def denormalize(self, p2d: torch.Tensor) -> torch.Tensor: + """Convert normalized 2D coordinates into pixel coordinates.""" + return p2d * self.f.unsqueeze(-2) + self.c.unsqueeze(-2) + + @autocast + def normalize(self, p2d: torch.Tensor) -> torch.Tensor: + """Convert pixel coordinates into normalized 2D coordinates.""" + return (p2d - self.c.unsqueeze(-2)) / self.f.unsqueeze(-2) + + def J_denormalize(self): + return torch.diag_embed(self.f).unsqueeze(-3) # 1 x 2 x 2 + + @autocast + def world2image(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]: + """Transform 3D points into 2D pixel coordinates.""" + p2d, visible = self.project(p3d) + p2d, mask = self.undistort(p2d) + p2d = self.denormalize(p2d) + valid = visible & mask & self.in_image(p2d) + return p2d, valid + + def J_world2image(self, p3d: torch.Tensor): + p2d_dist, valid = self.project(p3d) + J = self.J_denormalize() @ self.J_undistort(p2d_dist) @ self.J_project(p3d) + return J, valid + + def __repr__(self): + return f"Camera {self.shape} {self.dtype} {self.device}"