diff --git a/conf/maplocnet.yaml b/conf/maplocnet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b900ae970c6b905ff2c826293a17b054fd01b6fa
--- /dev/null
+++ b/conf/maplocnet.yaml
@@ -0,0 +1,105 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+    - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: true
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v2
+    backbone:
+      encoder: resnet50
+      pretrained: true
+      output_dim: 8
+      num_downsample: null
+      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 16
+    output_dim: 8
+    num_classes:
+      areas: 7
+      ways: 10
+      nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_0526_re
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "loss/total/val"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsingle-101.yaml b/conf/maplocnetsingle-101.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dfb29c2f3ffc7790681a7c2866d47b68828a3f6
--- /dev/null
+++ b/conf/maplocnetsingle-101.yaml
@@ -0,0 +1,105 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v2
+    backbone:
+      encoder: resnet101
+      pretrained: true
+      output_dim: 8
+      num_downsample: null
+      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+        areas: 7
+        ways: 10
+        nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_523_single_A100_no_mutil_scale_augmentation_resnet101_nosingle
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsingle.yaml b/conf/maplocnetsingle.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de237a27e841a36b3a60475441a72f5286a0362c
--- /dev/null
+++ b/conf/maplocnetsingle.yaml
@@ -0,0 +1,105 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v2
+    backbone:
+      encoder: resnet101
+      pretrained: true
+      output_dim: 8
+      num_downsample: null
+      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_523_single_A100_no_mutil_scale_augmentation_resnet101_2
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsingle0526.yaml b/conf/maplocnetsingle0526.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..207741912ee75df3502605906b3ccc6c5c62e9ad
--- /dev/null
+++ b/conf/maplocnetsingle0526.yaml
@@ -0,0 +1,105 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: false
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v2
+    backbone:
+      encoder: resnet50
+      pretrained: true
+      output_dim: 8
+      num_downsample: null
+      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_523_single_A100_no_mutil_scale
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsingleunet.yaml b/conf/maplocnetsingleunet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1744f7cfe38f4e0854d1fd24440910fc4dc2a055
--- /dev/null
+++ b/conf/maplocnetsingleunet.yaml
@@ -0,0 +1,105 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v3
+    backbone:
+#      encoder: resnet101
+#      pretrained: true
+      output_dim: 8
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_601_unet
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_DDRNet.yaml b/conf/maplocnetsinglhub_DDRNet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87b66a5494f310f9a61f2f0b57f437abde85246a
--- /dev/null
+++ b/conf/maplocnetsinglhub_DDRNet.yaml
@@ -0,0 +1,107 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v5
+    architecture: DDRNet23s
+    backbone:
+#      encoder: resnet50
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_DDRnet
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ddd598b688454fb4f8a0adce8802799c9b95aea1
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN-resnet18WeightedEmbedding.yaml
@@ -0,0 +1,112 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: false
+    flip: false
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: resnet18
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    weighted_embedding: ImprovedAttentionEmbedding
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_norelu_resnet18_ImprovedAttentionEmbedding
+  gpus: 5
+  seed: 42
+training:
+  lr: 0.0001
+  lr_scheduler:
+    name: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6e944e54af3f70bdbddff1a981784e12d6e9d90
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN-resnet34LightWeightedEmbedding.yaml
@@ -0,0 +1,112 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: false
+    flip: false
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: LightFPN
+    backbone:
+      encoder: resnet34
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    weighted_embedding: ImprovedAttentionEmbedding
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_norelu_resnet34Light_ImprovedAttentionEmbedding
+  gpus: 5
+  seed: 42
+training:
+  lr: 0.0001
+  lr_scheduler:
+    name: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 30
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1c24d02cb2420f0167ed4e1b2514bf429a2186d
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN-resnet34WeightedEmbedding.yaml
@@ -0,0 +1,112 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: false
+    flip: false
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: resnet34
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    weighted_embedding: ImprovedAttentionEmbedding
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_norelu_resnet34_ImprovedAttentionEmbedding
+  gpus: 5
+  seed: 42
+training:
+  lr: 0.0001
+  lr_scheduler:
+    name: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN-resnet50.yaml b/conf/maplocnetsinglhub_FPN-resnet50.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03b7de5dfa0992a76135a3767232660c2782e081
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN-resnet50.yaml
@@ -0,0 +1,111 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: false
+    flip: false
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: resnet50
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_norelu_resnet50_temp
+  gpus: 2
+  seed: 42
+training:
+  lr: 0.0001
+  lr_scheduler:
+    name: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml b/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6b12d18c3a68c318ec15b9b786bd153626fbcf0
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN-resnet50WeightedEmbedding.yaml
@@ -0,0 +1,112 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: false
+    flip: false
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: resnet50
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    weighted_embedding: ImprovedAttentionEmbedding
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_norelu_resnet50_ImprovedAttentionEmbedding
+  gpus: 3
+  seed: 42
+training:
+  lr: 0.0001
+  lr_scheduler:
+    name: StepLR
+    args:
+      step_size: 10
+      gamma: 0.1
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN.yaml b/conf/maplocnetsinglhub_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cb5481c408045c7d52d17e48eed4f75effef799
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN.yaml
@@ -0,0 +1,107 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: resnet101
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_FPN_Resnet50_norelu
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_FPN_Mobileone.yaml b/conf/maplocnetsinglhub_FPN_Mobileone.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a355e2374fdaa9b42a97d794278d671631d21df5
--- /dev/null
+++ b/conf/maplocnetsinglhub_FPN_Mobileone.yaml
@@ -0,0 +1,107 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: FPN
+    backbone:
+      encoder: mobileone_s3
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocnetsinglhub_FPN_mobileone_s3
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 10
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/maplocnetsinglhub_PSP.yaml b/conf/maplocnetsinglhub_PSP.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1547128ac29c8dde636bf437351a5bd7a79a7490
--- /dev/null
+++ b/conf/maplocnetsinglhub_PSP.yaml
@@ -0,0 +1,107 @@
+data:
+  root: '/root/autodl-fs/DATASET/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  test_citys:
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+    flip: true
+    image:
+      apply: True
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v4
+    architecture: PSP
+    backbone:
+      encoder: resnet50
+#      pretrained: true
+      output_dim: 8
+#      upsampling: 2
+#      num_downsample: null
+#      remove_stride_from_first_conv: false
+  name: maplocnet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 48
+    output_dim: 8
+    num_classes:
+      all: 50
+      # ways: 10
+      # nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: maplocanet_602_hub_PSP
+  gpus: 2
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 300000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "val/xy_recall_1m"
+    save_top_k: 5
+    mode: max
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/conf/orienternet.yaml b/conf/orienternet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..165294d6dd165b7e0dc5a814cd554fa5b0347b2e
--- /dev/null
+++ b/conf/orienternet.yaml
@@ -0,0 +1,103 @@
+data:
+  root: '/home/ubuntu/media/MapLocNetDataset/UAV/'
+  train_citys:
+  - Paris
+  - Berlin
+  - London
+  - Tokyo
+  - NewYork
+  val_citys:
+#  - Taipei
+#  - LosAngeles
+#  - Singapore
+  - SanFrancisco
+  image_size: 256
+  train:
+    batch_size: 12
+    num_workers: 4
+  val:
+    batch_size: ${..train.batch_size}
+    num_workers: ${.batch_size}
+  num_classes:
+    areas: 7
+    ways: 10
+    nodes: 33
+  pixel_per_meter: 1
+  crop_size_meters: 64
+  max_init_error: 48
+  add_map_mask: true
+  resize_image: 512
+  pad_to_square: true
+  rectify_pitch: true
+  augmentation:
+    rot90: true
+#    flip: true
+    image:
+      apply: true
+      brightness: 0.5
+      contrast: 0.4
+      saturation: 0.4
+      hue": 0.5/3.14
+model:
+  image_size: ${data.image_size}
+  latent_dim: 128
+  val_citys: ${data.val_citys}
+  image_encoder:
+    name: feature_extractor_v2
+    backbone:
+      encoder: resnet101
+      pretrained: true
+      output_dim: 8
+      num_downsample: null
+      remove_stride_from_first_conv: false
+  name: orienternet
+  matching_dim: 8
+  z_max: 32
+  x_max: 32
+  pixel_per_meter: 1
+  num_scale_bins: 33
+  num_rotations: 64
+  map_encoder:
+    embedding_dim: 16
+    output_dim: 8
+    num_classes:
+      areas: 7
+      ways: 10
+      nodes: 33
+    backbone:
+      encoder: vgg19
+      pretrained: false
+      output_scales:
+      - 0
+      num_downsample: 3
+      decoder:
+      - 128
+      - 64
+      - 64
+      padding: replicate
+    unary_prior: false
+  bev_net:
+    num_blocks: 4
+    latent_dim: 128
+    output_dim: 8
+    confidence: true
+experiment:
+  name: OrienterNet_my_multi_city_debug_code_0815_2_monitor_metric
+  gpus: 4
+  seed: 0
+training:
+  lr: 0.0001
+  lr_scheduler: null
+  finetune_from_checkpoint: null
+  trainer:
+    val_check_interval: 1000
+    log_every_n_steps: 100
+#    limit_val_batches: 1000
+    max_steps: 200000
+    devices: ${experiment.gpus}
+  checkpointing:
+    monitor: "loss/total/val"
+    save_top_k: 10
+    mode: min
+
+#    filename: '{epoch}-{step}-{loss_SanFrancisco:.2f}'
\ No newline at end of file
diff --git a/dataset/UAV/dataset.py b/dataset/UAV/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dbc8121bb355dcbee2ed7f2bc68e85e25b2b808
--- /dev/null
+++ b/dataset/UAV/dataset.py
@@ -0,0 +1,116 @@
+import torch
+from torch.utils.data import Dataset
+import os
+import cv2
+# @Time : 2023-02-13 22:56
+# @Author : Wang Zhen
+# @Email : frozenzhencola@163.com
+# @File : SatelliteTool.py
+# @Project : TGRS_seqmatch_2023_1
+import numpy as np
+import random
+from utils.geo import BoundaryBox, Projection
+from osm.tiling import TileManager,MapTileManager
+from pathlib import Path
+from torchvision import transforms
+from torch.utils.data import DataLoader
+
+class UavMapPair(Dataset):
+    def __init__(
+        self,
+        root: Path,
+        city:str,
+            training:bool,
+            transform
+    ):
+        super().__init__()
+
+        # self.root = root
+
+        # city = 'Manhattan'
+        # root = '/root/DATASET/CrossModel/'
+        # root=Path(root)
+        self.uav_image_path = root/city/'uav'
+        self.map_path = root/city/'map'
+        self.map_vis = root / city / 'map_vis'
+        info_path = root / city / 'info.csv'
+
+        self.info = np.loadtxt(str(info_path), dtype=str, delimiter=",", skiprows=1)
+
+        self.transform=transform
+        self.training=training
+
+    def random_center_crop(self,image):
+        height, width = image.shape[:2]
+
+        # 随机生成剪裁尺寸
+        crop_size = random.randint(min(height, width) // 2, min(height, width))
+
+        # 计算剪裁的起始坐标
+        start_x = (width - crop_size) // 2
+        start_y = (height - crop_size) // 2
+
+        # 进行剪裁
+        cropped_image = image[start_y:start_y + crop_size, start_x:start_x + crop_size]
+
+        return cropped_image
+    def __getitem__(self, index: int):
+        id, uav_name, map_name, \
+            uav_long, uav_lat, \
+            map_long, map_lat, \
+            tile_size_meters, pixel_per_meter, \
+            u, v, yaw,dis=self.info[index]
+
+
+        uav_image=cv2.imread(str(self.uav_image_path/uav_name))
+        if self.training:
+            uav_image =self.random_center_crop(uav_image)
+        uav_image=cv2.cvtColor(uav_image,cv2.COLOR_BGR2RGB)
+        if self.transform:
+            uav_image=self.transform(uav_image)
+        map=np.load(str(self.map_path/map_name))
+
+        return {
+            'map':torch.from_numpy(np.ascontiguousarray(map)).long(),
+            'image':torch.tensor(uav_image),
+            'roll_pitch_yaw':torch.tensor((0, 0, float(yaw))).float(),
+            'pixels_per_meter':torch.tensor(float(pixel_per_meter)).float(),
+            "uv":torch.tensor([float(u), float(v)]).float(),
+        }
+    def __len__(self):
+        return len(self.info)
+if __name__ == '__main__':
+
+    root=Path('/root/DATASET/OrienterNet/UavMap/')
+    city='NewYork'
+
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Resize(256),
+        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+    ])
+
+    dataset=UavMapPair(
+        root=root,
+        city=city,
+        transform=transform
+    )
+    datasetloder = DataLoader(dataset, batch_size=3)
+    for batch, i in enumerate(datasetloder):
+        pass
+        # 将PyTorch张量转换为PIL图像
+        # pil_image = Image.fromarray(i['uav_image'][0].permute(1, 2, 0).byte().numpy())
+
+        # 显示图像
+        # 将PyTorch张量转换为NumPy数组
+        # numpy_array = i['uav_image'][0].numpy()
+        #
+        # # 显示图像
+        # plt.imshow(numpy_array.transpose(1, 2, 0))
+        # plt.axis('off')
+        # plt.show()
+        #
+        # map_viz, label = Colormap.apply(i['map'][0])
+        # map_viz = map_viz * 255
+        # map_viz = map_viz.astype(np.uint8)
+        # plot_images([map_viz], titles=["OpenStreetMap raster"])
diff --git a/dataset/__init__.py b/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b51bfd2ed3eaf38719d8ee102df779d53d1ffa4
--- /dev/null
+++ b/dataset/__init__.py
@@ -0,0 +1,4 @@
+# from .UAV.dataset import UavMapPair
+from .dataset import UavMapDatasetModule
+
+# modules = {"UAV": UavMapPair}
diff --git a/dataset/dataset.py b/dataset/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c6a21153a9bab1e7ee3bbbafa302b8ace5fc30c
--- /dev/null
+++ b/dataset/dataset.py
@@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict, List
+# from logger import logger
+import numpy as np
+# import torch
+# import torch.utils.data as torchdata
+# import torchvision.transforms as tvf
+from omegaconf import DictConfig, OmegaConf
+import pytorch_lightning as pl
+from dataset.UAV.dataset import UavMapPair
+# from torch.utils.data import Dataset, DataLoader
+# from torchvision import transforms
+from torch.utils.data import Dataset, ConcatDataset
+from torch.utils.data import Dataset, DataLoader, random_split
+import torchvision.transforms as tvf
+
+# 自定义数据模块类，继承自pl.LightningDataModule
+class UavMapDatasetModule(pl.LightningDataModule):
+
+
+    def __init__(self, cfg: Dict[str, Any]):
+        super().__init__()
+
+        # default_cfg = OmegaConf.create(self.default_cfg)
+        # OmegaConf.set_struct(default_cfg, True)  # cannot add new keys
+        # self.cfg = OmegaConf.merge(default_cfg, cfg)
+        self.cfg=cfg
+        # self.transform = tvf.Compose([
+        #     tvf.ToTensor(),
+        #     tvf.Resize(self.cfg.image_size),
+        #     tvf.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        # ])
+
+        tfs = []
+        tfs.append(tvf.ToTensor())
+        tfs.append(tvf.Resize(self.cfg.image_size))
+        self.val_tfs = tvf.Compose(tfs)
+
+        #     transforms.Resize(self.cfg.image_size),
+        if cfg.augmentation.image.apply:
+            args = OmegaConf.masked_copy(
+                cfg.augmentation.image, ["brightness", "contrast", "saturation", "hue"]
+            )
+            tfs.append(tvf.ColorJitter(**args))
+        self.train_tfs = tvf.Compose(tfs)
+
+        # self.train_tfs=self.transform
+        # self.val_tfs = self.transform
+        self.init()
+    def init(self):
+        self.train_dataset = ConcatDataset([
+            UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.train_tfs)
+            for city in self.cfg.train_citys
+        ])
+
+        self.val_dataset = ConcatDataset([
+            UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.val_tfs)
+            for city in self.cfg.val_citys
+        ])
+        self.test_dataset = ConcatDataset([
+            UavMapPair(root=Path(self.cfg.root),city=city,training=False,transform=self.val_tfs)
+            for city in self.cfg.test_citys
+        ])
+
+        # self.val_datasets = {
+        #     city:UavMapPair(root=Path(self.cfg.root),city=city,transform=self.val_tfs)
+        #     for city in self.cfg.val_citys
+        #     }
+        # logger.info("train data len:{},val data len:{}".format(len(self.train_dataset),len(self.val_dataset)))
+        # # 定义分割比例
+        # train_ratio = 0.8  # 训练集比例
+        # # 计算分割的样本数量
+        # train_size = int(len(self.dataset) * train_ratio)
+        # val_size = len(self.dataset) - train_size
+        # self.train_dataset, self.val_dataset = random_split(self.dataset, [train_size, val_size])
+    def train_dataloader(self):
+        train_loader = DataLoader(self.train_dataset,
+                                  batch_size=self.cfg.train.batch_size,
+                                  num_workers=self.cfg.train.num_workers,
+                                  shuffle=True,pin_memory = True)
+        return train_loader
+
+    def val_dataloader(self):
+        val_loader = DataLoader(self.val_dataset,
+                                batch_size=self.cfg.val.batch_size,
+                                num_workers=self.cfg.val.num_workers,
+                                shuffle=True,pin_memory = True)
+        #
+        # my_dict = {k: v for k, v in self.val_datasets}
+        # val_loaders={city: DataLoader(dataset,
+        #                         batch_size=self.cfg.val.batch_size,
+        #                         num_workers=self.cfg.val.num_workers,
+        #                         shuffle=False,pin_memory = True) for city, dataset in self.val_datasets.items()}
+        return val_loader
+    def test_dataloader(self):
+        val_loader = DataLoader(self.test_dataset,
+                                batch_size=self.cfg.val.batch_size,
+                                num_workers=self.cfg.val.num_workers,
+                                shuffle=True,pin_memory = True)
+        #
+        # my_dict = {k: v for k, v in self.val_datasets}
+        # val_loaders={city: DataLoader(dataset,
+        #                         batch_size=self.cfg.val.batch_size,
+        #                         num_workers=self.cfg.val.num_workers,
+        #                         shuffle=False,pin_memory = True) for city, dataset in self.val_datasets.items()}
+        return val_loader
\ No newline at end of file
diff --git a/dataset/image.py b/dataset/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..75b3dc68cc2481150c5ff938483ae640956bcf0d
--- /dev/null
+++ b/dataset/image.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from typing import Callable, Optional, Union, Sequence
+
+import numpy as np
+import torch
+import torchvision.transforms.functional as tvf
+import collections
+from scipy.spatial.transform import Rotation
+
+from utils.geometry import from_homogeneous, to_homogeneous
+from utils.wrappers import Camera
+
+
+def rectify_image(
+    image: torch.Tensor,
+    cam: Camera,
+    roll: float,
+    pitch: Optional[float] = None,
+    valid: Optional[torch.Tensor] = None,
+):
+    *_, h, w = image.shape
+    grid = torch.meshgrid(
+        [torch.arange(w, device=image.device), torch.arange(h, device=image.device)],
+        indexing="xy",
+    )
+    grid = torch.stack(grid, -1).to(image.dtype)
+
+    if pitch is not None:
+        args = ("ZX", (roll, pitch))
+    else:
+        args = ("Z", roll)
+    R = Rotation.from_euler(*args, degrees=True).as_matrix()
+    R = torch.from_numpy(R).to(image)
+
+    grid_rect = to_homogeneous(cam.normalize(grid)) @ R.T
+    grid_rect = cam.denormalize(from_homogeneous(grid_rect))
+    grid_norm = (grid_rect + 0.5) / grid.new_tensor([w, h]) * 2 - 1
+    rectified = torch.nn.functional.grid_sample(
+        image[None],
+        grid_norm[None],
+        align_corners=False,
+        mode="bilinear",
+    ).squeeze(0)
+    if valid is None:
+        valid = torch.all((grid_norm >= -1) & (grid_norm <= 1), -1)
+    else:
+        valid = (
+            torch.nn.functional.grid_sample(
+                valid[None, None].float(),
+                grid_norm[None],
+                align_corners=False,
+                mode="nearest",
+            )[0, 0]
+            > 0
+        )
+    return rectified, valid
+
+
+def resize_image(
+    image: torch.Tensor,
+    size: Union[int, Sequence, np.ndarray],
+    fn: Optional[Callable] = None,
+    camera: Optional[Camera] = None,
+    valid: np.ndarray = None,
+):
+    """Resize an image to a fixed size, or according to max or min edge."""
+    *_, h, w = image.shape
+    if fn is not None:
+        assert isinstance(size, int)
+        scale = size / fn(h, w)
+        h_new, w_new = int(round(h * scale)), int(round(w * scale))
+        scale = (scale, scale)
+    else:
+        if isinstance(size, (collections.abc.Sequence, np.ndarray)):
+            w_new, h_new = size
+        elif isinstance(size, int):
+            w_new = h_new = size
+        else:
+            raise ValueError(f"Incorrect new size: {size}")
+        scale = (w_new / w, h_new / h)
+    if (w, h) != (w_new, h_new):
+        mode = tvf.InterpolationMode.BILINEAR
+        image = tvf.resize(image, (h_new, w_new), interpolation=mode, antialias=True)
+        image.clip_(0, 1)
+        if camera is not None:
+            camera = camera.scale(scale)
+        if valid is not None:
+            valid = tvf.resize(
+                valid.unsqueeze(0),
+                (h_new, w_new),
+                interpolation=tvf.InterpolationMode.NEAREST,
+            ).squeeze(0)
+    ret = [image, scale]
+    if camera is not None:
+        ret.append(camera)
+    if valid is not None:
+        ret.append(valid)
+    return ret
+
+
+def pad_image(
+    image: torch.Tensor,
+    size: Union[int, Sequence, np.ndarray],
+    camera: Optional[Camera] = None,
+    valid: torch.Tensor = None,
+    crop_and_center: bool = False,
+):
+    if isinstance(size, int):
+        w_new = h_new = size
+    elif isinstance(size, (collections.abc.Sequence, np.ndarray)):
+        w_new, h_new = size
+    else:
+        raise ValueError(f"Incorrect new size: {size}")
+    *c, h, w = image.shape
+    if crop_and_center:
+        diff = np.array([w - w_new, h - h_new])
+        left, top = left_top = np.round(diff / 2).astype(int)
+        right, bottom = diff - left_top
+    else:
+        assert h <= h_new
+        assert w <= w_new
+        top = bottom = left = right = 0
+    slice_out = np.s_[..., : min(h, h_new), : min(w, w_new)]
+    slice_in = np.s_[
+        ..., max(top, 0) : h - max(bottom, 0), max(left, 0) : w - max(right, 0)
+    ]
+    if (w, h) == (w_new, h_new):
+        out = image
+    else:
+        out = torch.zeros((*c, h_new, w_new), dtype=image.dtype)
+        out[slice_out] = image[slice_in]
+        if camera is not None:
+            camera = camera.crop((max(left, 0), max(top, 0)), (w_new, h_new))
+    out_valid = torch.zeros((h_new, w_new), dtype=torch.bool)
+    out_valid[slice_out] = True if valid is None else valid[slice_in]
+    if camera is not None:
+        return out, out_valid, camera
+    else:
+        return out, out_valid
diff --git a/dataset/torch.py b/dataset/torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..9547ca149c606a345e5b8916591e43c26031022c
--- /dev/null
+++ b/dataset/torch.py
@@ -0,0 +1,111 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import collections
+import os
+
+import torch
+from torch.utils.data import get_worker_info
+from torch.utils.data._utils.collate import (
+    default_collate_err_msg_format,
+    np_str_obj_array_pattern,
+)
+from lightning_fabric.utilities.seed import pl_worker_init_function
+from lightning_utilities.core.apply_func import apply_to_collection
+from lightning_fabric.utilities.apply_func import move_data_to_device
+
+
+def collate(batch):
+    """Difference with PyTorch default_collate: it can stack other tensor-like objects.
+    Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
+    https://github.com/cvg/pixloc
+    Released under the Apache License 2.0
+    """
+    if not isinstance(batch, list):  # no batching
+        return batch
+    elem = batch[0]
+    elem_type = type(elem)
+    if isinstance(elem, torch.Tensor):
+        out = None
+        if torch.utils.data.get_worker_info() is not None:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum(x.numel() for x in batch)
+            storage = elem.storage()._new_shared(numel, device=elem.device)
+            out = elem.new(storage).resize_(len(batch), *list(elem.size()))
+        return torch.stack(batch, 0, out=out)
+    elif (
+        elem_type.__module__ == "numpy"
+        and elem_type.__name__ != "str_"
+        and elem_type.__name__ != "string_"
+    ):
+        if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap":
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+
+            return collate([torch.as_tensor(b) for b in batch])
+        elif elem.shape == ():  # scalars
+            return torch.as_tensor(batch)
+    elif isinstance(elem, float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(elem, int):
+        return torch.tensor(batch)
+    elif isinstance(elem, (str, bytes)):
+        return batch
+    elif isinstance(elem, collections.abc.Mapping):
+        return {key: collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, "_fields"):  # namedtuple
+        return elem_type(*(collate(samples) for samples in zip(*batch)))
+    elif isinstance(elem, collections.abc.Sequence):
+        # check to make sure that the elements in batch have consistent size
+        it = iter(batch)
+        elem_size = len(next(it))
+        if not all(len(elem) == elem_size for elem in it):
+            raise RuntimeError("each element in list of batch should be of equal size")
+        transposed = zip(*batch)
+        return [collate(samples) for samples in transposed]
+    else:
+        # try to stack anyway in case the object implements stacking.
+        try:
+            return torch.stack(batch, 0)
+        except TypeError as e:
+            if "expected Tensor as element" in str(e):
+                return batch
+            else:
+                raise e
+
+
+def set_num_threads(nt):
+    """Force numpy and other libraries to use a limited number of threads."""
+    try:
+        import mkl
+    except ImportError:
+        pass
+    else:
+        mkl.set_num_threads(nt)
+    torch.set_num_threads(1)
+    os.environ["IPC_ENABLE"] = "1"
+    for o in [
+        "OPENBLAS_NUM_THREADS",
+        "NUMEXPR_NUM_THREADS",
+        "OMP_NUM_THREADS",
+        "MKL_NUM_THREADS",
+    ]:
+        os.environ[o] = str(nt)
+
+
+def worker_init_fn(i):
+    info = get_worker_info()
+    pl_worker_init_function(info.id)
+    num_threads = info.dataset.cfg.get("num_threads")
+    if num_threads is not None:
+        set_num_threads(num_threads)
+
+
+def unbatch_to_device(data, device="cpu"):
+    data = move_data_to_device(data, device)
+    data = apply_to_collection(data, torch.Tensor, lambda x: x.squeeze(0))
+    data = apply_to_collection(
+        data, list, lambda x: x[0] if len(x) == 1 and isinstance(x[0], str) else x
+    )
+    return data
diff --git a/evaluation/kitti.py b/evaluation/kitti.py
new file mode 100644
index 0000000000000000000000000000000000000000..e91da069f307a533b0471a3fb43f8622cadc60db
--- /dev/null
+++ b/evaluation/kitti.py
@@ -0,0 +1,89 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import argparse
+from pathlib import Path
+from typing import Optional, Tuple
+
+from omegaconf import OmegaConf, DictConfig
+
+from .. import logger
+from ..data import KittiDataModule
+from .run import evaluate
+
+
+default_cfg_single = OmegaConf.create({})
+# For the sequential evaluation, we need to center the map around the GT location,
+# since random offsets would accumulate and leave only the GT location with a valid mask.
+# This should not have much impact on the results.
+default_cfg_sequential = OmegaConf.create(
+    {
+        "data": {
+            "mask_radius": KittiDataModule.default_cfg["max_init_error"],
+            "prior_range_rotation": KittiDataModule.default_cfg[
+                "max_init_error_rotation"
+            ]
+            + 1,
+            "max_init_error": 0,
+            "max_init_error_rotation": 0,
+        },
+        "chunking": {
+            "max_length": 100,  # about 10s?
+        },
+    }
+)
+
+
+def run(
+    split: str,
+    experiment: str,
+    cfg: Optional[DictConfig] = None,
+    sequential: bool = False,
+    thresholds: Tuple[int] = (1, 3, 5),
+    **kwargs,
+):
+    cfg = cfg or {}
+    if isinstance(cfg, dict):
+        cfg = OmegaConf.create(cfg)
+    default = default_cfg_sequential if sequential else default_cfg_single
+    cfg = OmegaConf.merge(default, cfg)
+    dataset = KittiDataModule(cfg.get("data", {}))
+
+    metrics = evaluate(
+        experiment,
+        cfg,
+        dataset,
+        split=split,
+        sequential=sequential,
+        viz_kwargs=dict(show_dir_error=True, show_masked_prob=False),
+        **kwargs,
+    )
+
+    keys = ["directional_error", "yaw_max_error"]
+    if sequential:
+        keys += ["directional_seq_error", "yaw_seq_error"]
+    for k in keys:
+        rec = metrics[k].recall(thresholds).double().numpy().round(2).tolist()
+        logger.info("Recall %s: %s at %s m/°", k, rec, thresholds)
+    return metrics
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--experiment", type=str, required=True)
+    parser.add_argument(
+        "--split", type=str, default="test", choices=["test", "val", "train"]
+    )
+    parser.add_argument("--sequential", action="store_true")
+    parser.add_argument("--output_dir", type=Path)
+    parser.add_argument("--num", type=int)
+    parser.add_argument("dotlist", nargs="*")
+    args = parser.parse_args()
+    cfg = OmegaConf.from_cli(args.dotlist)
+    run(
+        args.split,
+        args.experiment,
+        cfg,
+        args.sequential,
+        output_dir=args.output_dir,
+        num=args.num,
+    )
diff --git a/evaluation/mapillary.py b/evaluation/mapillary.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/evaluation/run.py b/evaluation/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0697ecd367f7b2d9c14ba164914ee7a7812f71
--- /dev/null
+++ b/evaluation/run.py
@@ -0,0 +1,252 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import functools
+from itertools import islice
+from typing import Callable, Dict, Optional, Tuple
+from pathlib import Path
+
+import numpy as np
+import torch
+from omegaconf import DictConfig, OmegaConf
+from torchmetrics import MetricCollection
+from pytorch_lightning import seed_everything
+from tqdm import tqdm
+
+from logger import logger, EXPERIMENTS_PATH
+from dataset.torch import collate, unbatch_to_device
+from models.voting import argmax_xyr, fuse_gps
+from models.metrics import AngleError, LateralLongitudinalError, Location2DError
+from models.sequential import GPSAligner, RigidAligner
+from module import GenericModule
+from utils.io import download_file, DATA_URL
+from evaluation.viz import plot_example_single, plot_example_sequential
+from evaluation.utils import write_dump
+
+
+pretrained_models = dict(
+    OrienterNet_MGL=("orienternet_mgl.ckpt", dict(num_rotations=256)),
+)
+
+
+def resolve_checkpoint_path(experiment_or_path: str) -> Path:
+    path = Path(experiment_or_path)
+    if not path.exists():
+        # provided name of experiment
+        path = Path(EXPERIMENTS_PATH, *experiment_or_path.split("/"))
+        if not path.exists():
+            if experiment_or_path in set(p for p, _ in pretrained_models.values()):
+                download_file(f"{DATA_URL}/{experiment_or_path}", path)
+            else:
+                raise FileNotFoundError(path)
+    if path.is_file():
+        return path
+    # provided only the experiment name
+    maybe_path = path / "last-step-v1.ckpt"
+    if not maybe_path.exists():
+        maybe_path = path / "last.ckpt"
+    if not maybe_path.exists():
+        raise FileNotFoundError(f"Could not find any checkpoint in {path}.")
+    return maybe_path
+
+
+@torch.no_grad()
+def evaluate_single_image(
+    dataloader: torch.utils.data.DataLoader,
+    model: GenericModule,
+    num: Optional[int] = None,
+    callback: Optional[Callable] = None,
+    progress: bool = True,
+    mask_index: Optional[Tuple[int]] = None,
+    has_gps: bool = False,
+):
+    ppm = model.model.conf.pixel_per_meter
+    metrics = MetricCollection(model.model.metrics())
+    metrics["directional_error"] = LateralLongitudinalError(ppm)
+    if has_gps:
+        metrics["xy_gps_error"] = Location2DError("uv_gps", ppm)
+        metrics["xy_fused_error"] = Location2DError("uv_fused", ppm)
+        metrics["yaw_fused_error"] = AngleError("yaw_fused")
+    metrics = metrics.to(model.device)
+
+    for i, batch_ in enumerate(
+        islice(tqdm(dataloader, total=num, disable=not progress), num)
+    ):
+        batch = model.transfer_batch_to_device(batch_, model.device, i)
+        # Ablation: mask semantic classes
+        if mask_index is not None:
+            mask = batch["map"][0, mask_index[0]] == (mask_index[1] + 1)
+            batch["map"][0, mask_index[0]][mask] = 0
+        pred = model(batch)
+
+        if has_gps:
+            (uv_gps,) = pred["uv_gps"] = batch["uv_gps"]
+            pred["log_probs_fused"] = fuse_gps(
+                pred["log_probs"], uv_gps, ppm, sigma=batch["accuracy_gps"]
+            )
+            uvt_fused = argmax_xyr(pred["log_probs_fused"])
+            pred["uv_fused"] = uvt_fused[..., :2]
+            pred["yaw_fused"] = uvt_fused[..., -1]
+            del uv_gps, uvt_fused
+
+        results = metrics(pred, batch)
+        if callback is not None:
+            callback(
+                i, model, unbatch_to_device(pred), unbatch_to_device(batch_), results
+            )
+        del batch_, batch, pred, results
+
+    return metrics.cpu()
+
+
+@torch.no_grad()
+def evaluate_sequential(
+    dataset: torch.utils.data.Dataset,
+    chunk2idx: Dict,
+    model: GenericModule,
+    num: Optional[int] = None,
+    shuffle: bool = False,
+    callback: Optional[Callable] = None,
+    progress: bool = True,
+    num_rotations: int = 512,
+    mask_index: Optional[Tuple[int]] = None,
+    has_gps: bool = True,
+):
+    chunk_keys = list(chunk2idx)
+    if shuffle:
+        chunk_keys = [chunk_keys[i] for i in torch.randperm(len(chunk_keys))]
+    if num is not None:
+        chunk_keys = chunk_keys[:num]
+    lengths = [len(chunk2idx[k]) for k in chunk_keys]
+    logger.info(
+        "Min/max/med lengths: %d/%d/%d, total number of images: %d",
+        min(lengths),
+        np.median(lengths),
+        max(lengths),
+        sum(lengths),
+    )
+    viz = callback is not None
+
+    metrics = MetricCollection(model.model.metrics())
+    ppm = model.model.conf.pixel_per_meter
+    metrics["directional_error"] = LateralLongitudinalError(ppm)
+    metrics["xy_seq_error"] = Location2DError("uv_seq", ppm)
+    metrics["yaw_seq_error"] = AngleError("yaw_seq")
+    metrics["directional_seq_error"] = LateralLongitudinalError(ppm, key="uv_seq")
+    if has_gps:
+        metrics["xy_gps_error"] = Location2DError("uv_gps", ppm)
+        metrics["xy_gps_seq_error"] = Location2DError("uv_gps_seq", ppm)
+        metrics["yaw_gps_seq_error"] = AngleError("yaw_gps_seq")
+    metrics = metrics.to(model.device)
+
+    keys_save = ["uvr_max", "uv_max", "yaw_max", "uv_expectation"]
+    if has_gps:
+        keys_save.append("uv_gps")
+    if viz:
+        keys_save.append("log_probs")
+
+    for chunk_index, key in enumerate(tqdm(chunk_keys, disable=not progress)):
+        indices = chunk2idx[key]
+        aligner = RigidAligner(track_priors=viz, num_rotations=num_rotations)
+        if has_gps:
+            aligner_gps = GPSAligner(track_priors=viz, num_rotations=num_rotations)
+        batches = []
+        preds = []
+        for i in indices:
+            data = dataset[i]
+            data = model.transfer_batch_to_device(data, model.device, 0)
+            pred = model(collate([data]))
+
+            canvas = data["canvas"]
+            data["xy_geo"] = xy = canvas.to_xy(data["uv"].double())
+            data["yaw"] = yaw = data["roll_pitch_yaw"][-1].double()
+            aligner.update(pred["log_probs"][0], canvas, xy, yaw)
+
+            if has_gps:
+                (uv_gps) = pred["uv_gps"] = data["uv_gps"][None]
+                xy_gps = canvas.to_xy(uv_gps.double())
+                aligner_gps.update(xy_gps, data["accuracy_gps"], canvas, xy, yaw)
+
+            if not viz:
+                data.pop("image")
+                data.pop("map")
+            batches.append(data)
+            preds.append({k: pred[k][0] for k in keys_save})
+            del pred
+
+        xy_gt = torch.stack([b["xy_geo"] for b in batches])
+        yaw_gt = torch.stack([b["yaw"] for b in batches])
+        aligner.compute()
+        xy_seq, yaw_seq = aligner.transform(xy_gt, yaw_gt)
+        if has_gps:
+            aligner_gps.compute()
+            xy_gps_seq, yaw_gps_seq = aligner_gps.transform(xy_gt, yaw_gt)
+        results = []
+        for i in range(len(indices)):
+            preds[i]["uv_seq"] = batches[i]["canvas"].to_uv(xy_seq[i]).float()
+            preds[i]["yaw_seq"] = yaw_seq[i].float()
+            if has_gps:
+                preds[i]["uv_gps_seq"] = (
+                    batches[i]["canvas"].to_uv(xy_gps_seq[i]).float()
+                )
+                preds[i]["yaw_gps_seq"] = yaw_gps_seq[i].float()
+            results.append(metrics(preds[i], batches[i]))
+        if viz:
+            callback(chunk_index, model, batches, preds, results, aligner)
+        del aligner, preds, batches, results
+    return metrics.cpu()
+
+
+def evaluate(
+    experiment: str,
+    cfg: DictConfig,
+    dataset,
+    split: str,
+    sequential: bool = False,
+    output_dir: Optional[Path] = None,
+    callback: Optional[Callable] = None,
+    num_workers: int = 1,
+    viz_kwargs=None,
+    **kwargs,
+):
+    if experiment in pretrained_models:
+        experiment, cfg_override = pretrained_models[experiment]
+        cfg = OmegaConf.merge(OmegaConf.create(dict(model=cfg_override)), cfg)
+
+    logger.info("Evaluating model %s with config %s", experiment, cfg)
+    checkpoint_path = resolve_checkpoint_path(experiment)
+    model = GenericModule.load_from_checkpoint(
+        checkpoint_path, cfg=cfg, find_best=not experiment.endswith(".ckpt")
+    )
+    model = model.eval()
+    if torch.cuda.is_available():
+        model = model.cuda()
+
+    dataset.prepare_data()
+    dataset.setup()
+
+    if output_dir is not None:
+        output_dir.mkdir(exist_ok=True, parents=True)
+        if callback is None:
+            if sequential:
+                callback = plot_example_sequential
+            else:
+                callback = plot_example_single
+            callback = functools.partial(
+                callback, out_dir=output_dir, **(viz_kwargs or {})
+            )
+    kwargs = {**kwargs, "callback": callback}
+
+    seed_everything(dataset.cfg.seed)
+    if sequential:
+        dset, chunk2idx = dataset.sequence_dataset(split, **cfg.chunking)
+        metrics = evaluate_sequential(dset, chunk2idx, model, **kwargs)
+    else:
+        loader = dataset.dataloader(split, shuffle=True, num_workers=num_workers)
+        metrics = evaluate_single_image(loader, model, **kwargs)
+
+    results = metrics.compute()
+    logger.info("All results: %s", results)
+    if output_dir is not None:
+        write_dump(output_dir, experiment, cfg, results, metrics)
+        logger.info("Outputs have been written to %s.", output_dir)
+    return metrics
diff --git a/evaluation/utils.py b/evaluation/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cc954ed20557c351965cad89aa2e249986986ee
--- /dev/null
+++ b/evaluation/utils.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+from omegaconf import OmegaConf
+
+from utils.io import write_json
+
+
+def compute_recall(errors):
+    num_elements = len(errors)
+    sort_idx = np.argsort(errors)
+    errors = np.array(errors.copy())[sort_idx]
+    recall = (np.arange(num_elements) + 1) / num_elements
+    recall = np.r_[0, recall]
+    errors = np.r_[0, errors]
+    return errors, recall
+
+
+def compute_auc(errors, recall, thresholds):
+    aucs = []
+    for t in thresholds:
+        last_index = np.searchsorted(errors, t, side="right")
+        r = np.r_[recall[:last_index], recall[last_index - 1]]
+        e = np.r_[errors[:last_index], t]
+        auc = np.trapz(r, x=e) / t
+        aucs.append(auc * 100)
+    return aucs
+
+
+def write_dump(output_dir, experiment, cfg, results, metrics):
+    dump = {
+        "experiment": experiment,
+        "cfg": OmegaConf.to_container(cfg),
+        "results": results,
+        "errors": {},
+    }
+    for k, m in metrics.items():
+        if hasattr(m, "get_errors"):
+            dump["errors"][k] = m.get_errors().numpy()
+    write_json(output_dir / "log.json", dump)
diff --git a/evaluation/viz.py b/evaluation/viz.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cd9f7dfd0f2103f2ebdda8cfe8022ad5a2e719b
--- /dev/null
+++ b/evaluation/viz.py
@@ -0,0 +1,178 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+
+from utils.io import write_torch_image
+from utils.viz_2d import plot_images, features_to_RGB, save_plot
+from utils.viz_localization import (
+    likelihood_overlay,
+    plot_pose,
+    plot_dense_rotations,
+    add_circle_inset,
+)
+from osm.viz import Colormap, plot_nodes
+
+
+def plot_example_single(
+    idx,
+    model,
+    pred,
+    data,
+    results,
+    plot_bev=True,
+    out_dir=None,
+    fig_for_paper=False,
+    show_gps=False,
+    show_fused=False,
+    show_dir_error=False,
+    show_masked_prob=False,
+):
+    scene, name, rasters, uv_gt = (data[k] for k in ("scene", "name", "map", "uv"))
+    uv_gps = data.get("uv_gps")
+    yaw_gt = data["roll_pitch_yaw"][-1].numpy()
+    image = data["image"].permute(1, 2, 0)
+    if "valid" in data:
+        image = image.masked_fill(~data["valid"].unsqueeze(-1), 0.3)
+
+    lp_uvt = lp_uv = pred["log_probs"]
+    if show_fused and "log_probs_fused" in pred:
+        lp_uvt = lp_uv = pred["log_probs_fused"]
+    elif not show_masked_prob and "scores_unmasked" in pred:
+        lp_uvt = lp_uv = pred["scores_unmasked"]
+    has_rotation = lp_uvt.ndim == 3
+    if has_rotation:
+        lp_uv = lp_uvt.max(-1).values
+    if lp_uv.min() > -np.inf:
+        lp_uv = lp_uv.clip(min=np.percentile(lp_uv, 1))
+    prob = lp_uv.exp()
+    uv_p, yaw_p = pred["uv_max"], pred.get("yaw_max")
+    if show_fused and "uv_fused" in pred:
+        uv_p, yaw_p = pred["uv_fused"], pred.get("yaw_fused")
+    feats_map = pred["map"]["map_features"][0]
+    (feats_map_rgb,) = features_to_RGB(feats_map.numpy())
+
+    text1 = rf'$\Delta xy$: {results["xy_max_error"]:.1f}m'
+    if has_rotation:
+        text1 += rf', $\Delta\theta$: {results["yaw_max_error"]:.1f}°'
+    if show_fused and "xy_fused_error" in results:
+        text1 += rf', $\Delta xy_{{fused}}$: {results["xy_fused_error"]:.1f}m'
+        text1 += rf', $\Delta\theta_{{fused}}$: {results["yaw_fused_error"]:.1f}°'
+    if show_dir_error and "directional_error" in results:
+        err_lat, err_lon = results["directional_error"]
+        text1 += rf",  $\Delta$lateral/longitundinal={err_lat:.1f}m/{err_lon:.1f}m"
+    if "xy_gps_error" in results:
+        text1 += rf',  $\Delta xy_{{GPS}}$: {results["xy_gps_error"]:.1f}m'
+
+    map_viz = Colormap.apply(rasters)
+    overlay = likelihood_overlay(prob.numpy(), map_viz.mean(-1, keepdims=True))
+    plot_images(
+        [image, map_viz, overlay, feats_map_rgb],
+        titles=[text1, "map", "likelihood", "neural map"],
+        dpi=75,
+        cmaps="jet",
+    )
+    fig = plt.gcf()
+    axes = fig.axes
+    axes[1].images[0].set_interpolation("none")
+    axes[2].images[0].set_interpolation("none")
+    Colormap.add_colorbar()
+    plot_nodes(1, rasters[2])
+
+    if show_gps and uv_gps is not None:
+        plot_pose([1], uv_gps, c="blue")
+    plot_pose([1], uv_gt, yaw_gt, c="red")
+    plot_pose([1], uv_p, yaw_p, c="k")
+    plot_dense_rotations(2, lp_uvt.exp())
+    inset_center = pred["uv_max"] if results["xy_max_error"] < 5 else uv_gt
+    axins = add_circle_inset(axes[2], inset_center)
+    axins.scatter(*uv_gt, lw=1, c="red", ec="k", s=50, zorder=15)
+    axes[0].text(
+        0.003,
+        0.003,
+        f"{scene}/{name}",
+        transform=axes[0].transAxes,
+        fontsize=3,
+        va="bottom",
+        ha="left",
+        color="w",
+    )
+    plt.show()
+    if out_dir is not None:
+        name_ = name.replace("/", "_")
+        p = str(out_dir / f"{scene}_{name_}_{{}}.pdf")
+        save_plot(p.format("pred"))
+        plt.close()
+
+        if fig_for_paper:
+            # !cp ../datasets/MGL/{scene}/images/{name}.jpg {out_dir}/{scene}_{name}.jpg
+            plot_images([map_viz])
+            plt.gca().images[0].set_interpolation("none")
+            plot_nodes(0, rasters[2])
+            plot_pose([0], uv_gt, yaw_gt, c="red")
+            plot_pose([0], pred["uv_max"], pred["yaw_max"], c="k")
+            save_plot(p.format("map"))
+            plt.close()
+            plot_images([lp_uv], cmaps="jet")
+            plot_dense_rotations(0, lp_uvt.exp())
+            save_plot(p.format("loglikelihood"), dpi=100)
+            plt.close()
+            plot_images([overlay])
+            plt.gca().images[0].set_interpolation("none")
+            axins = add_circle_inset(plt.gca(), inset_center)
+            axins.scatter(*uv_gt, lw=1, c="red", ec="k", s=50)
+            save_plot(p.format("likelihood"))
+            plt.close()
+            write_torch_image(
+                p.format("neuralmap").replace("pdf", "jpg"), feats_map_rgb
+            )
+            write_torch_image(p.format("image").replace("pdf", "jpg"), image.numpy())
+
+    if not plot_bev:
+        return
+
+    feats_q = pred["features_bev"]
+    mask_bev = pred["valid_bev"]
+    prior = None
+    if "log_prior" in pred["map"]:
+        prior = pred["map"]["log_prior"][0].sigmoid()
+    if "bev" in pred and "confidence" in pred["bev"]:
+        conf_q = pred["bev"]["confidence"]
+    else:
+        conf_q = torch.norm(feats_q, dim=0)
+    conf_q = conf_q.masked_fill(~mask_bev, np.nan)
+    (feats_q_rgb,) = features_to_RGB(feats_q.numpy(), masks=[mask_bev.numpy()])
+    # feats_map_rgb, feats_q_rgb, = features_to_RGB(
+    #     feats_map.numpy(), feats_q.numpy(), masks=[None, mask_bev])
+    norm_map = torch.norm(feats_map, dim=0)
+
+    plot_images(
+        [conf_q, feats_q_rgb, norm_map] + ([] if prior is None else [prior]),
+        titles=["BEV confidence", "BEV features", "map norm"]
+        + ([] if prior is None else ["map prior"]),
+        dpi=50,
+        cmaps="jet",
+    )
+    plt.show()
+
+    if out_dir is not None:
+        save_plot(p.format("bev"))
+        plt.close()
+
+
+def plot_example_sequential(
+    idx,
+    model,
+    pred,
+    data,
+    results,
+    plot_bev=True,
+    out_dir=None,
+    fig_for_paper=False,
+    show_gps=False,
+    show_fused=False,
+    show_dir_error=False,
+    show_masked_prob=False,
+):
+    return
diff --git a/feature_extractor_models/__init__.py b/feature_extractor_models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3420f4a51d4d7684b94d5782a97f2902b6c20897
--- /dev/null
+++ b/feature_extractor_models/__init__.py
@@ -0,0 +1,82 @@
+
+from . import encoders
+from . import decoders
+
+
+from .decoders.unet import Unet
+from .decoders.unetplusplus import UnetPlusPlus
+from .decoders.manet import MAnet
+from .decoders.linknet import Linknet
+from .decoders.fpn import FPN
+from .decoders.lightfpn import LightFPN
+from .decoders.pspnet import PSPNet
+from .decoders.deeplabv3 import DeepLabV3, DeepLabV3Plus
+from .decoders.pan import PAN
+from .base.hub_mixin import from_pretrained
+
+from .__version__ import __version__
+
+# some private imports for create_model function
+from typing import Optional as _Optional
+import torch as _torch
+
+
+def create_model(
+    arch: str,
+    encoder_name: str = "resnet34",
+    encoder_weights: _Optional[str] = "imagenet",
+    in_channels: int = 3,
+    classes: int = 1,
+    **kwargs,
+) -> _torch.nn.Module:
+    """Models entrypoint, allows to create any model architecture just with
+    parameters, without using its class
+    """
+
+    archs = [
+        Unet,
+        UnetPlusPlus,
+        MAnet,
+        Linknet,
+        FPN,
+        LightFPN,
+        PSPNet,
+        DeepLabV3,
+        DeepLabV3Plus,
+        PAN,
+    ]
+    archs_dict = {a.__name__.lower(): a for a in archs}
+    try:
+        model_class = archs_dict[arch.lower()]
+    except KeyError:
+        raise KeyError(
+            "Wrong architecture type `{}`. Available options are: {}".format(
+                arch, list(archs_dict.keys())
+            )
+        )
+    return model_class(
+        encoder_name=encoder_name,
+        encoder_weights=encoder_weights,
+        in_channels=in_channels,
+        classes=classes,
+        **kwargs,
+    )
+
+
+__all__ = [
+    "encoders",
+    "decoders",
+    "Unet",
+    "UnetPlusPlus",
+    "MAnet",
+    "Linknet",
+    "FPN",
+    "LightFPN",
+    "PSPNet",
+    "DeepLabV3",
+    "DeepLabV3Plus",
+    "PAN",
+    "from_pretrained",
+    "create_model",
+    "__version__",
+]
diff --git a/feature_extractor_models/__version__.py b/feature_extractor_models/__version__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ced1f115c071a33685f437ee864588488616059
--- /dev/null
+++ b/feature_extractor_models/__version__.py
@@ -0,0 +1,3 @@
+VERSION = (0, 3, "4dev0")
+
+__version__ = ".".join(map(str, VERSION))
diff --git a/feature_extractor_models/base/__init__.py b/feature_extractor_models/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df076d9802cc9de24092a20b729f53042285459
--- /dev/null
+++ b/feature_extractor_models/base/__init__.py
@@ -0,0 +1,13 @@
+from .model import SegmentationModel
+
+from .modules import Conv2dReLU, Attention
+
+from .heads import SegmentationHead, ClassificationHead
+
+__all__ = [
+    "SegmentationModel",
+    "Conv2dReLU",
+    "Attention",
+    "SegmentationHead",
+    "ClassificationHead",
+]
diff --git a/feature_extractor_models/base/heads.py b/feature_extractor_models/base/heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbc939cad427fb6b9edfb02c0e841a68de84a03a
--- /dev/null
+++ b/feature_extractor_models/base/heads.py
@@ -0,0 +1,34 @@
+import torch.nn as nn
+from .modules import Activation
+
+
+class SegmentationHead(nn.Sequential):
+    def __init__(
+        self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1
+    ):
+        conv2d = nn.Conv2d(
+            in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2
+        )
+        upsampling = (
+            nn.UpsamplingBilinear2d(scale_factor=upsampling)
+            if upsampling > 1
+            else nn.Identity()
+        )
+        activation = Activation(activation)
+        super().__init__(conv2d, upsampling, activation)
+
+
+class ClassificationHead(nn.Sequential):
+    def __init__(
+        self, in_channels, classes, pooling="avg", dropout=0.2, activation=None
+    ):
+        if pooling not in ("max", "avg"):
+            raise ValueError(
+                "Pooling should be one of ('max', 'avg'), got {}.".format(pooling)
+            )
+        pool = nn.AdaptiveAvgPool2d(1) if pooling == "avg" else nn.AdaptiveMaxPool2d(1)
+        flatten = nn.Flatten()
+        dropout = nn.Dropout(p=dropout, inplace=True) if dropout else nn.Identity()
+        linear = nn.Linear(in_channels, classes, bias=True)
+        activation = Activation(activation)
+        super().__init__(pool, flatten, dropout, linear, activation)
diff --git a/feature_extractor_models/base/hub_mixin.py b/feature_extractor_models/base/hub_mixin.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f1056ea091c9dc3b774d8381d72d5fdb9eb74ea
--- /dev/null
+++ b/feature_extractor_models/base/hub_mixin.py
@@ -0,0 +1,154 @@
+import json
+from pathlib import Path
+from typing import Optional, Union
+from functools import wraps
+from huggingface_hub import (
+    PyTorchModelHubMixin,
+    ModelCard,
+    ModelCardData,
+    hf_hub_download,
+)
+
+
+MODEL_CARD = """
+---
+{{ card_data }}
+---
+# {{ model_name }} Model Card
+
+Table of Contents:
+- [Load trained model](#load-trained-model)
+- [Model init parameters](#model-init-parameters)
+- [Model metrics](#model-metrics)
+- [Dataset](#dataset)
+
+## Load trained model
+```python
+import feature_extractor_models as smp
+
+model = smp.{{ model_name }}.from_pretrained("{{ save_directory | default("<save-directory-or-repo>", true)}}")
+```
+
+## Model init parameters
+```python
+model_init_params = {{ model_parameters }}
+```
+
+## Model metrics
+{{ metrics | default("[More Information Needed]", true) }}
+
+## Dataset
+Dataset name: {{ dataset | default("[More Information Needed]", true) }}
+
+## More Information
+- Library: {{ repo_url | default("[More Information Needed]", true) }}
+- Docs: {{ docs_url | default("[More Information Needed]", true) }}
+
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin)
+"""
+
+
+def _format_parameters(parameters: dict):
+    params = {k: v for k, v in parameters.items() if not k.startswith("_")}
+    params = [
+        f'"{k}": {v}' if not isinstance(v, str) else f'"{k}": "{v}"'
+        for k, v in params.items()
+    ]
+    params = ",\n".join([f"    {param}" for param in params])
+    params = "{\n" + f"{params}" + "\n}"
+    return params
+
+
+class SMPHubMixin(PyTorchModelHubMixin):
+    def generate_model_card(self, *args, **kwargs) -> ModelCard:
+        model_parameters_json = _format_parameters(self._hub_mixin_config)
+        directory = self._save_directory if hasattr(self, "_save_directory") else None
+        repo_id = self._repo_id if hasattr(self, "_repo_id") else None
+        repo_or_directory = repo_id if repo_id is not None else directory
+
+        metrics = self._metrics if hasattr(self, "_metrics") else None
+        dataset = self._dataset if hasattr(self, "_dataset") else None
+
+        if metrics is not None:
+            metrics = json.dumps(metrics, indent=4)
+            metrics = f"```json\n{metrics}\n```"
+
+        model_card_data = ModelCardData(
+            languages=["python"],
+            library_name="segmentation-models-pytorch",
+            license="mit",
+            tags=["semantic-segmentation", "pytorch", "segmentation-models-pytorch"],
+            pipeline_tag="image-segmentation",
+        )
+        model_card = ModelCard.from_template(
+            card_data=model_card_data,
+            template_str=MODEL_CARD,
+            repo_url="https://github.com/qubvel/segmentation_models.pytorch",
+            docs_url="https://smp.readthedocs.io/en/latest/",
+            model_parameters=model_parameters_json,
+            save_directory=repo_or_directory,
+            model_name=self.__class__.__name__,
+            metrics=metrics,
+            dataset=dataset,
+        )
+        return model_card
+
+    def _set_attrs_from_kwargs(self, attrs, kwargs):
+        for attr in attrs:
+            if attr in kwargs:
+                setattr(self, f"_{attr}", kwargs.pop(attr))
+
+    def _del_attrs(self, attrs):
+        for attr in attrs:
+            if hasattr(self, f"_{attr}"):
+                delattr(self, f"_{attr}")
+
+    @wraps(PyTorchModelHubMixin.save_pretrained)
+    def save_pretrained(
+        self, save_directory: Union[str, Path], *args, **kwargs
+    ) -> Optional[str]:
+        # set additional attributes to be used in generate_model_card
+        self._save_directory = save_directory
+        self._set_attrs_from_kwargs(["metrics", "dataset"], kwargs)
+
+        # set additional attribute to be used in from_pretrained
+        self._hub_mixin_config["_model_class"] = self.__class__.__name__
+
+        try:
+            # call the original save_pretrained
+            result = super().save_pretrained(save_directory, *args, **kwargs)
+        finally:
+            # delete the additional attributes
+            self._del_attrs(["save_directory", "metrics", "dataset"])
+            self._hub_mixin_config.pop("_model_class")
+
+        return result
+
+    @wraps(PyTorchModelHubMixin.push_to_hub)
+    def push_to_hub(self, repo_id: str, *args, **kwargs):
+        self._repo_id = repo_id
+        self._set_attrs_from_kwargs(["metrics", "dataset"], kwargs)
+        result = super().push_to_hub(repo_id, *args, **kwargs)
+        self._del_attrs(["repo_id", "metrics", "dataset"])
+        return result
+
+    @property
+    def config(self):
+        return self._hub_mixin_config
+
+
+@wraps(PyTorchModelHubMixin.from_pretrained)
+def from_pretrained(pretrained_model_name_or_path: str, *args, **kwargs):
+    config_path = hf_hub_download(
+        pretrained_model_name_or_path,
+        filename="config.json",
+        revision=kwargs.get("revision", None),
+    )
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    model_class_name = config.pop("_model_class")
+
+    import feature_extractor_models as smp
+
+    model_class = getattr(smp, model_class_name)
+    return model_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
diff --git a/feature_extractor_models/base/initialization.py b/feature_extractor_models/base/initialization.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bea4aa6aebf19ffbe1ccc0690566d01f91565ef
--- /dev/null
+++ b/feature_extractor_models/base/initialization.py
@@ -0,0 +1,26 @@
+import torch.nn as nn
+
+
+def initialize_decoder(module):
+    for m in module.modules():
+        if isinstance(m, nn.Conv2d):
+            nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu")
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.constant_(m.weight, 1)
+            nn.init.constant_(m.bias, 0)
+
+        elif isinstance(m, nn.Linear):
+            nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+
+def initialize_head(module):
+    for m in module.modules():
+        if isinstance(m, (nn.Linear, nn.Conv2d)):
+            nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
diff --git a/feature_extractor_models/base/model.py b/feature_extractor_models/base/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..2724948e8e747296d0e03c9a8d557fd898b86ba5
--- /dev/null
+++ b/feature_extractor_models/base/model.py
@@ -0,0 +1,71 @@
+import torch
+
+from . import initialization as init
+from .hub_mixin import SMPHubMixin
+import torch.nn as nn
+
+class SegmentationModel(torch.nn.Module, SMPHubMixin):
+    def initialize(self):
+
+        # self.out = nn.Sequential(
+        #     nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+        #     nn.BatchNorm2d(8),
+        #     nn.ReLU(inplace=True),
+        # )
+        init.initialize_decoder(self.decoder)
+        init.initialize_head(self.segmentation_head)
+        if self.classification_head is not None:
+            init.initialize_head(self.classification_head)
+
+    def check_input_shape(self, x):
+        h, w = x.shape[-2:]
+        output_stride = self.encoder.output_stride
+        if h % output_stride != 0 or w % output_stride != 0:
+            new_h = (
+                (h // output_stride + 1) * output_stride
+                if h % output_stride != 0
+                else h
+            )
+            new_w = (
+                (w // output_stride + 1) * output_stride
+                if w % output_stride != 0
+                else w
+            )
+            raise RuntimeError(
+                f"Wrong input shape height={h}, width={w}. Expected image height and width "
+                f"divisible by {output_stride}. Consider pad your images to shape ({new_h}, {new_w})."
+            )
+
+    def forward(self, x):
+        """Sequentially pass `x` trough model`s encoder, decoder and heads"""
+
+        self.check_input_shape(x)
+
+        features = self.encoder(x)
+        decoder_output = self.decoder(*features)
+
+        decoder_output = self.segmentation_head(decoder_output)
+        #
+        # if self.classification_head is not None:
+        #     labels = self.classification_head(features[-1])
+        #     return masks, labels
+
+        return decoder_output
+
+    @torch.no_grad()
+    def predict(self, x):
+        """Inference method. Switch model to `eval` mode, call `.forward(x)` with `torch.no_grad()`
+
+        Args:
+            x: 4D torch tensor with shape (batch_size, channels, height, width)
+
+        Return:
+            prediction: 4D torch tensor with shape (batch_size, classes, height, width)
+
+        """
+        if self.training:
+            self.eval()
+
+        x = self.forward(x)
+
+        return x
diff --git a/feature_extractor_models/base/modules.py b/feature_extractor_models/base/modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..51c1400016cf519b57a31d7c602b0e7a11a9a670
--- /dev/null
+++ b/feature_extractor_models/base/modules.py
@@ -0,0 +1,131 @@
+import torch
+import torch.nn as nn
+
+try:
+    from inplace_abn import InPlaceABN
+except ImportError:
+    InPlaceABN = None
+
+
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        padding=0,
+        stride=1,
+        use_batchnorm=True,
+    ):
+        if use_batchnorm == "inplace" and InPlaceABN is None:
+            raise RuntimeError(
+                "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
+                + "To install see: https://github.com/mapillary/inplace_abn"
+            )
+
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+
+        if use_batchnorm == "inplace":
+            bn = InPlaceABN(out_channels, activation="leaky_relu", activation_param=0.0)
+            relu = nn.Identity()
+
+        elif use_batchnorm and use_batchnorm != "inplace":
+            bn = nn.BatchNorm2d(out_channels)
+
+        else:
+            bn = nn.Identity()
+
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+
+
+class SCSEModule(nn.Module):
+    def __init__(self, in_channels, reduction=16):
+        super().__init__()
+        self.cSE = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, in_channels // reduction, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels // reduction, in_channels, 1),
+            nn.Sigmoid(),
+        )
+        self.sSE = nn.Sequential(nn.Conv2d(in_channels, 1, 1), nn.Sigmoid())
+
+    def forward(self, x):
+        return x * self.cSE(x) + x * self.sSE(x)
+
+
+class ArgMax(nn.Module):
+    def __init__(self, dim=None):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x):
+        return torch.argmax(x, dim=self.dim)
+
+
+class Clamp(nn.Module):
+    def __init__(self, min=0, max=1):
+        super().__init__()
+        self.min, self.max = min, max
+
+    def forward(self, x):
+        return torch.clamp(x, self.min, self.max)
+
+
+class Activation(nn.Module):
+    def __init__(self, name, **params):
+        super().__init__()
+
+        if name is None or name == "identity":
+            self.activation = nn.Identity(**params)
+        elif name == "sigmoid":
+            self.activation = nn.Sigmoid()
+        elif name == "relu":
+            self.activation = nn.ReLU(inplace=True)
+        elif name == "softmax2d":
+            self.activation = nn.Softmax(dim=1, **params)
+        elif name == "softmax":
+            self.activation = nn.Softmax(**params)
+        elif name == "logsoftmax":
+            self.activation = nn.LogSoftmax(**params)
+        elif name == "tanh":
+            self.activation = nn.Tanh()
+        elif name == "argmax":
+            self.activation = ArgMax(**params)
+        elif name == "argmax2d":
+            self.activation = ArgMax(dim=1, **params)
+        elif name == "clamp":
+            self.activation = Clamp(**params)
+        elif callable(name):
+            self.activation = name(**params)
+        else:
+            raise ValueError(
+                f"Activation should be callable/sigmoid/softmax/logsoftmax/tanh/"
+                f"argmax/argmax2d/clamp/None; got {name}"
+            )
+
+    def forward(self, x):
+        return self.activation(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, name, **params):
+        super().__init__()
+
+        if name is None:
+            self.attention = nn.Identity(**params)
+        elif name == "scse":
+            self.attention = SCSEModule(**params)
+        else:
+            raise ValueError("Attention {} is not implemented".format(name))
+
+    def forward(self, x):
+        return self.attention(x)
diff --git a/feature_extractor_models/decoders/__init__.py b/feature_extractor_models/decoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/feature_extractor_models/decoders/deeplabv3/__init__.py b/feature_extractor_models/decoders/deeplabv3/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..723fa4cc4cd98bf941d37e622dbc2fac5d5938a8
--- /dev/null
+++ b/feature_extractor_models/decoders/deeplabv3/__init__.py
@@ -0,0 +1,3 @@
+from .model import DeepLabV3, DeepLabV3Plus
+
+__all__ = ["DeepLabV3", "DeepLabV3Plus"]
diff --git a/feature_extractor_models/decoders/deeplabv3/decoder.py b/feature_extractor_models/decoders/deeplabv3/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bec43c98d3c7d002ea357e48682d8a9af48b145
--- /dev/null
+++ b/feature_extractor_models/decoders/deeplabv3/decoder.py
@@ -0,0 +1,220 @@
+"""
+BSD 3-Clause License
+
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["DeepLabV3Decoder"]
+
+
+class DeepLabV3Decoder(nn.Sequential):
+    def __init__(self, in_channels, out_channels=256, atrous_rates=(12, 24, 36)):
+        super().__init__(
+            ASPP(in_channels, out_channels, atrous_rates),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+        self.out_channels = out_channels
+
+    def forward(self, *features):
+        return super().forward(features[-1])
+
+
+class DeepLabV3PlusDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        out_channels=256,
+        atrous_rates=(12, 24, 36),
+        output_stride=16,
+    ):
+        super().__init__()
+        if output_stride not in {8, 16}:
+            raise ValueError(
+                "Output stride should be 8 or 16, got {}.".format(output_stride)
+            )
+
+        self.out_channels = out_channels
+        self.output_stride = output_stride
+
+        self.aspp = nn.Sequential(
+            ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True),
+            SeparableConv2d(
+                out_channels, out_channels, kernel_size=3, padding=1, bias=False
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+        scale_factor = 2 if output_stride == 8 else 4
+        self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
+
+        highres_in_channels = encoder_channels[-4]
+        highres_out_channels = 48  # proposed by authors of paper
+        self.block1 = nn.Sequential(
+            nn.Conv2d(
+                highres_in_channels, highres_out_channels, kernel_size=1, bias=False
+            ),
+            nn.BatchNorm2d(highres_out_channels),
+            nn.ReLU(),
+        )
+        self.block2 = nn.Sequential(
+            SeparableConv2d(
+                highres_out_channels + out_channels,
+                out_channels,
+                kernel_size=3,
+                padding=1,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, *features):
+        aspp_features = self.aspp(features[-1])
+        aspp_features = self.up(aspp_features)
+        high_res_features = self.block1(features[-4])
+        concat_features = torch.cat([aspp_features, high_res_features], dim=1)
+        fused_features = self.block2(concat_features)
+        return fused_features
+
+
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPSeparableConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            SeparableConv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super().__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        for mod in self:
+            x = mod(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+    def __init__(self, in_channels, out_channels, atrous_rates, separable=False):
+        super(ASPP, self).__init__()
+        modules = []
+        modules.append(
+            nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, bias=False),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+        )
+
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        ASPPConvModule = ASPPConv if not separable else ASPPSeparableConv
+
+        modules.append(ASPPConvModule(in_channels, out_channels, rate1))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate2))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+
+        self.convs = nn.ModuleList(modules)
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, kernel_size=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+        )
+
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+
+
+class SeparableConv2d(nn.Sequential):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        bias=True,
+    ):
+        dephtwise_conv = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            bias=False,
+        )
+        pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)
+        super().__init__(dephtwise_conv, pointwise_conv)
diff --git a/feature_extractor_models/decoders/deeplabv3/model.py b/feature_extractor_models/decoders/deeplabv3/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..f23c9167e98c0091d76bf388bcd51d481f1a361e
--- /dev/null
+++ b/feature_extractor_models/decoders/deeplabv3/model.py
@@ -0,0 +1,178 @@
+from typing import Optional
+
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from feature_extractor_models.encoders import get_encoder
+from .decoder import DeepLabV3Decoder, DeepLabV3PlusDecoder
+
+
+class DeepLabV3(SegmentationModel):
+    """DeepLabV3_ implementation from "Rethinking Atrous Convolution for Semantic Image Segmentation"
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: A number of convolution filters in ASPP module. Default is 256
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+    Returns:
+        ``torch.nn.Module``: **DeepLabV3**
+
+    .. _DeeplabV3:
+        https://arxiv.org/abs/1706.05587
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_channels: int = 256,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 8,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+            output_stride=8,
+        )
+
+        self.decoder = DeepLabV3Decoder(
+            in_channels=self.encoder.out_channels[-1], out_channels=decoder_channels
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+
+class DeepLabV3Plus(SegmentationModel):
+    """DeepLabV3+ implementation from "Encoder-Decoder with Atrous Separable
+    Convolution for Semantic Image Segmentation"
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
+        decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values)
+        decoder_channels: A number of convolution filters in ASPP module. Default is 256
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+    Returns:
+        ``torch.nn.Module``: **DeepLabV3Plus**
+
+    Reference:
+        https://arxiv.org/abs/1802.02611v3
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        encoder_output_stride: int = 16,
+        decoder_channels: int = 256,
+        decoder_atrous_rates: tuple = (12, 24, 36),
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        if encoder_output_stride not in [8, 16]:
+            raise ValueError(
+                "Encoder output stride should be 8 or 16, got {}".format(
+                    encoder_output_stride
+                )
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+            output_stride=encoder_output_stride,
+        )
+
+        self.decoder = DeepLabV3PlusDecoder(
+            encoder_channels=self.encoder.out_channels,
+            out_channels=decoder_channels,
+            atrous_rates=decoder_atrous_rates,
+            output_stride=encoder_output_stride,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
diff --git a/feature_extractor_models/decoders/fpn/__init__.py b/feature_extractor_models/decoders/fpn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad4be9db3bbd683a825e6dd17b84bf72f0228b16
--- /dev/null
+++ b/feature_extractor_models/decoders/fpn/__init__.py
@@ -0,0 +1,3 @@
+from .model import FPN
+
+__all__ = ["FPN"]
diff --git a/feature_extractor_models/decoders/fpn/decoder.py b/feature_extractor_models/decoders/fpn/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..766190f45c32c6f9e4eb6d03632585c339fb81fa
--- /dev/null
+++ b/feature_extractor_models/decoders/fpn/decoder.py
@@ -0,0 +1,133 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Conv3x3GNReLU(nn.Module):
+    def __init__(self, in_channels, out_channels, upsample=False):
+        super().__init__()
+        self.upsample = upsample
+        self.block = nn.Sequential(
+            nn.Conv2d(
+                in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False
+            ),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, x):
+        x = self.block(x)
+        if self.upsample:
+            x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
+        return x
+
+
+class FPNBlock(nn.Module):
+    def __init__(self, pyramid_channels, skip_channels):
+        super().__init__()
+        self.skip_conv = nn.Conv2d(skip_channels, pyramid_channels, kernel_size=1)
+
+    def forward(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        skip = self.skip_conv(skip)
+        x = x + skip
+        return x
+
+
+class SegmentationBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, n_upsamples=0):
+        super().__init__()
+
+        blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))]
+
+        if n_upsamples > 1:
+            for _ in range(1, n_upsamples):
+                blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True))
+
+        self.block = nn.Sequential(*blocks)
+
+    def forward(self, x):
+        return self.block(x)
+
+
+class MergeBlock(nn.Module):
+    def __init__(self, policy):
+        super().__init__()
+        if policy not in ["add", "cat"]:
+            raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(policy)
+            )
+        self.policy = policy
+
+    def forward(self, x):
+        if self.policy == "add":
+            return sum(x)
+        elif self.policy == "cat":
+            return torch.cat(x, dim=1)
+        else:
+            raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(
+                    self.policy
+                )
+            )
+
+
+class FPNDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        encoder_depth=5,
+        pyramid_channels=256,
+        segmentation_channels=128,
+        dropout=0.2,
+        merge_policy="add",
+    ):
+        super().__init__()
+
+        self.out_channels = (
+            segmentation_channels
+            if merge_policy == "add"
+            else segmentation_channels * 4
+        )
+        if encoder_depth < 3:
+            raise ValueError(
+                "Encoder depth for FPN decoder cannot be less than 3, got {}.".format(
+                    encoder_depth
+                )
+            )
+
+        encoder_channels = encoder_channels[::-1]
+        encoder_channels = encoder_channels[: encoder_depth + 1]
+
+        self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
+        self.p4 = FPNBlock(pyramid_channels, encoder_channels[1])
+        self.p3 = FPNBlock(pyramid_channels, encoder_channels[2])
+        self.p2 = FPNBlock(pyramid_channels, encoder_channels[3])
+
+        self.seg_blocks = nn.ModuleList(
+            [
+                SegmentationBlock(
+                    pyramid_channels, segmentation_channels, n_upsamples=n_upsamples
+                )
+                for n_upsamples in [3, 2, 1, 0]
+            ]
+        )
+
+        self.merge = MergeBlock(merge_policy)
+        self.dropout = nn.Dropout2d(p=dropout, inplace=True)
+
+    def forward(self, *features):
+        c2, c3, c4, c5 = features[-4:]
+
+        p5 = self.p5(c5)
+        p4 = self.p4(p5, c4)
+        p3 = self.p3(p4, c3)
+        p2 = self.p2(p3, c2)
+
+        feature_pyramid = [
+            seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])
+        ]
+        x = self.merge(feature_pyramid)
+        x = self.dropout(x)
+
+        return x
diff --git a/feature_extractor_models/decoders/fpn/model.py b/feature_extractor_models/decoders/fpn/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..5da47a65185a2dd679bd615bfada0fa757873ad6
--- /dev/null
+++ b/feature_extractor_models/decoders/fpn/model.py
@@ -0,0 +1,107 @@
+from typing import Optional
+
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from feature_extractor_models.encoders import get_encoder
+from .decoder import FPNDecoder
+
+
+class FPN(SegmentationModel):
+    """FPN_ is a fully convolution neural network for image semantic segmentation.
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_
+        decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_
+        decoder_merge_policy: Determines how to merge pyramid features inside FPN. Available options are **add**
+            and **cat**
+        decoder_dropout: Spatial dropout rate in range (0, 1) for feature pyramid in FPN_
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **FPN**
+
+    .. _FPN:
+        http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_pyramid_channels: int = 256,
+        decoder_segmentation_channels: int = 128,
+        decoder_merge_policy: str = "add",
+        decoder_dropout: float = 0.2,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        # validate input params
+        if encoder_name.startswith("mit_b") and encoder_depth != 5:
+            raise ValueError(
+                "Encoder {} support only encoder_depth=5".format(encoder_name)
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = FPNDecoder(
+            encoder_channels=self.encoder.out_channels,
+            encoder_depth=encoder_depth,
+            pyramid_channels=decoder_pyramid_channels,
+            segmentation_channels=decoder_segmentation_channels,
+            dropout=decoder_dropout,
+            merge_policy=decoder_merge_policy,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "fpn-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/lightfpn/__init__.py b/feature_extractor_models/decoders/lightfpn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9741e035896abe645de6760c1b756f7f7bbe761
--- /dev/null
+++ b/feature_extractor_models/decoders/lightfpn/__init__.py
@@ -0,0 +1,3 @@
+from .model import LightFPN
+
+__all__ = ["LightFPN"]
diff --git a/feature_extractor_models/decoders/lightfpn/decoder.py b/feature_extractor_models/decoders/lightfpn/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f19d5ae408026f8ae08c8b69d37aa61578b7278c
--- /dev/null
+++ b/feature_extractor_models/decoders/lightfpn/decoder.py
@@ -0,0 +1,144 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Conv3x3GNReLU(nn.Module):
+    def __init__(self, in_channels, out_channels, upsample=False):
+        super().__init__()
+        self.upsample = upsample
+        self.block = nn.Sequential(
+            nn.Conv2d(
+                in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False
+            ),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, x):
+        x = self.block(x)
+        if self.upsample:
+            x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
+        return x
+
+
+class DepthwiseSeparableConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
+        super().__init__()
+        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
+        self.pointwise = nn.Conv2d(in_channels, out_channels, 1)
+
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+
+class LightFPNBlock(nn.Module):
+    def __init__(self, pyramid_channels, skip_channels):
+        super().__init__()
+        self.skip_conv = DepthwiseSeparableConv2d(skip_channels, pyramid_channels, kernel_size=1)
+
+    def forward(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        skip = self.skip_conv(skip)
+        x = x + skip
+        return x
+
+
+class SegmentationBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, n_upsamples=0):
+        super().__init__()
+
+        blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))]
+
+        if n_upsamples > 1:
+            for _ in range(1, n_upsamples):
+                blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True))
+
+        self.block = nn.Sequential(*blocks)
+
+    def forward(self, x):
+        return self.block(x)
+
+
+class MergeBlock(nn.Module):
+    def __init__(self, policy):
+        super().__init__()
+        if policy not in ["add", "cat"]:
+            raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(policy)
+            )
+        self.policy = policy
+
+    def forward(self, x):
+        if self.policy == "add":
+            return sum(x)
+        elif self.policy == "cat":
+            return torch.cat(x, dim=1)
+        else:
+            raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(
+                    self.policy
+                )
+            )
+
+
+class FPNDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        encoder_depth=5,
+        pyramid_channels=256,
+        segmentation_channels=128,
+        dropout=0.2,
+        merge_policy="add",
+    ):
+        super().__init__()
+
+        self.out_channels = (
+            segmentation_channels
+            if merge_policy == "add"
+            else segmentation_channels * 4
+        )
+        if encoder_depth < 3:
+            raise ValueError(
+                "Encoder depth for FPN decoder cannot be less than 3, got {}.".format(
+                    encoder_depth
+                )
+            )
+
+        encoder_channels = encoder_channels[::-1]
+        encoder_channels = encoder_channels[: encoder_depth + 1]
+
+        self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
+        self.p4 = LightFPNBlock(pyramid_channels, encoder_channels[1])
+        self.p3 = LightFPNBlock(pyramid_channels, encoder_channels[2])
+        self.p2 = LightFPNBlock(pyramid_channels, encoder_channels[3])
+
+        self.seg_blocks = nn.ModuleList(
+            [
+                SegmentationBlock(
+                    pyramid_channels, segmentation_channels, n_upsamples=n_upsamples
+                )
+                for n_upsamples in [3, 2, 1, 0]
+            ]
+        )
+
+        self.merge = MergeBlock(merge_policy)
+        self.dropout = nn.Dropout2d(p=dropout, inplace=True)
+
+    def forward(self, *features):
+        c2, c3, c4, c5 = features[-4:]
+
+        p5 = self.p5(c5)
+        p4 = self.p4(p5, c4)
+        p3 = self.p3(p4, c3)
+        p2 = self.p2(p3, c2)
+
+        feature_pyramid = [
+            seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])
+        ]
+        x = self.merge(feature_pyramid)
+        x = self.dropout(x)
+
+        return x
diff --git a/feature_extractor_models/decoders/lightfpn/model.py b/feature_extractor_models/decoders/lightfpn/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..da19ea13add202458387ee40b55b82ed5c309dca
--- /dev/null
+++ b/feature_extractor_models/decoders/lightfpn/model.py
@@ -0,0 +1,107 @@
+from typing import Optional
+
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from feature_extractor_models.encoders import get_encoder
+from .decoder import FPNDecoder
+
+
+class LightFPN(SegmentationModel):
+    """FPN_ is a fully convolution neural network for image semantic segmentation.
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_
+        decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_
+        decoder_merge_policy: Determines how to merge pyramid features inside FPN. Available options are **add**
+            and **cat**
+        decoder_dropout: Spatial dropout rate in range (0, 1) for feature pyramid in FPN_
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **FPN**
+
+    .. _FPN:
+        http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_pyramid_channels: int = 256,
+        decoder_segmentation_channels: int = 128,
+        decoder_merge_policy: str = "add",
+        decoder_dropout: float = 0.2,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        # validate input params
+        if encoder_name.startswith("mit_b") and encoder_depth != 5:
+            raise ValueError(
+                "Encoder {} support only encoder_depth=5".format(encoder_name)
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = FPNDecoder(
+            encoder_channels=self.encoder.out_channels,
+            encoder_depth=encoder_depth,
+            pyramid_channels=decoder_pyramid_channels,
+            segmentation_channels=decoder_segmentation_channels,
+            dropout=decoder_dropout,
+            merge_policy=decoder_merge_policy,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "fpn-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/linknet/__init__.py b/feature_extractor_models/decoders/linknet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e662f80411191b67a1e579efc7e14ef9f15db09
--- /dev/null
+++ b/feature_extractor_models/decoders/linknet/__init__.py
@@ -0,0 +1,3 @@
+from .model import Linknet
+
+__all__ = ["Linknet"]
diff --git a/feature_extractor_models/decoders/linknet/decoder.py b/feature_extractor_models/decoders/linknet/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..c70b9c9310373734749c7554780d794305430bd0
--- /dev/null
+++ b/feature_extractor_models/decoders/linknet/decoder.py
@@ -0,0 +1,82 @@
+import torch.nn as nn
+
+from feature_extractor_models.base import modules
+
+
+class TransposeX2(nn.Sequential):
+    def __init__(self, in_channels, out_channels, use_batchnorm=True):
+        super().__init__()
+        layers = [
+            nn.ConvTranspose2d(
+                in_channels, out_channels, kernel_size=4, stride=2, padding=1
+            ),
+            nn.ReLU(inplace=True),
+        ]
+
+        if use_batchnorm:
+            layers.insert(1, nn.BatchNorm2d(out_channels))
+
+        super().__init__(*layers)
+
+
+class DecoderBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, use_batchnorm=True):
+        super().__init__()
+
+        self.block = nn.Sequential(
+            modules.Conv2dReLU(
+                in_channels,
+                in_channels // 4,
+                kernel_size=1,
+                use_batchnorm=use_batchnorm,
+            ),
+            TransposeX2(
+                in_channels // 4, in_channels // 4, use_batchnorm=use_batchnorm
+            ),
+            modules.Conv2dReLU(
+                in_channels // 4,
+                out_channels,
+                kernel_size=1,
+                use_batchnorm=use_batchnorm,
+            ),
+        )
+
+    def forward(self, x, skip=None):
+        x = self.block(x)
+        if skip is not None:
+            x = x + skip
+        return x
+
+
+class LinknetDecoder(nn.Module):
+    def __init__(
+        self, encoder_channels, prefinal_channels=32, n_blocks=5, use_batchnorm=True
+    ):
+        super().__init__()
+
+        # remove first skip
+        encoder_channels = encoder_channels[1:]
+        # reverse channels to start from head of encoder
+        encoder_channels = encoder_channels[::-1]
+
+        channels = list(encoder_channels) + [prefinal_channels]
+
+        self.blocks = nn.ModuleList(
+            [
+                DecoderBlock(channels[i], channels[i + 1], use_batchnorm=use_batchnorm)
+                for i in range(n_blocks)
+            ]
+        )
+
+    def forward(self, *features):
+        features = features[1:]  # remove first skip
+        features = features[::-1]  # reverse channels to start from head of encoder
+
+        x = features[0]
+        skips = features[1:]
+
+        for i, decoder_block in enumerate(self.blocks):
+            skip = skips[i] if i < len(skips) else None
+            x = decoder_block(x, skip)
+
+        return x
diff --git a/feature_extractor_models/decoders/linknet/model.py b/feature_extractor_models/decoders/linknet/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..b73af76157d5325d4b0cb83c06ca17d3ad4a8727
--- /dev/null
+++ b/feature_extractor_models/decoders/linknet/model.py
@@ -0,0 +1,98 @@
+from typing import Optional, Union
+
+from feature_extractor_models.base import (
+    SegmentationHead,
+    SegmentationModel,
+    ClassificationHead,
+)
+from feature_extractor_models.encoders import get_encoder
+from .decoder import LinknetDecoder
+
+
+class Linknet(SegmentationModel):
+    """Linknet_ is a fully convolution neural network for image semantic segmentation. Consist of *encoder*
+    and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial
+    resolution (skip connections) which are used by decoder to define accurate segmentation mask. Use *sum*
+    for fusing decoder blocks with skip connections.
+
+    Note:
+        This implementation by default has 4 skip connections (original - 3).
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers
+            is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption.
+            Available options are **True, False, "inplace"**
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **Linknet**
+
+    .. _Linknet:
+        https://arxiv.org/abs/1707.03718
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_use_batchnorm: bool = True,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        if encoder_name.startswith("mit_b"):
+            raise ValueError(
+                "Encoder `{}` is not supported for Linknet".format(encoder_name)
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = LinknetDecoder(
+            encoder_channels=self.encoder.out_channels,
+            n_blocks=encoder_depth,
+            prefinal_channels=32,
+            use_batchnorm=decoder_use_batchnorm,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=32, out_channels=classes, activation=activation, kernel_size=1
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "link-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/manet/__init__.py b/feature_extractor_models/decoders/manet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a866f2c50d8bb6117edd9f4949e5e114e799dc2b
--- /dev/null
+++ b/feature_extractor_models/decoders/manet/__init__.py
@@ -0,0 +1,3 @@
+from .model import MAnet
+
+__all__ = ["MAnet"]
diff --git a/feature_extractor_models/decoders/manet/decoder.py b/feature_extractor_models/decoders/manet/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..61d8ccbe5e0afd1792e9efa5b1ba7aacab2b1794
--- /dev/null
+++ b/feature_extractor_models/decoders/manet/decoder.py
@@ -0,0 +1,187 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from feature_extractor_models.base import modules as md
+
+
+class PAB(nn.Module):
+    def __init__(self, in_channels, out_channels, pab_channels=64):
+        super(PAB, self).__init__()
+        # Series of 1x1 conv to generate attention feature maps
+        self.pab_channels = pab_channels
+        self.in_channels = in_channels
+        self.top_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1)
+        self.center_conv = nn.Conv2d(in_channels, pab_channels, kernel_size=1)
+        self.bottom_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1)
+        self.map_softmax = nn.Softmax(dim=1)
+        self.out_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1)
+
+    def forward(self, x):
+        bsize = x.size()[0]
+        h = x.size()[2]
+        w = x.size()[3]
+        x_top = self.top_conv(x)
+        x_center = self.center_conv(x)
+        x_bottom = self.bottom_conv(x)
+
+        x_top = x_top.flatten(2)
+        x_center = x_center.flatten(2).transpose(1, 2)
+        x_bottom = x_bottom.flatten(2).transpose(1, 2)
+
+        sp_map = torch.matmul(x_center, x_top)
+        sp_map = self.map_softmax(sp_map.view(bsize, -1)).view(bsize, h * w, h * w)
+        sp_map = torch.matmul(sp_map, x_bottom)
+        sp_map = sp_map.reshape(bsize, self.in_channels, h, w)
+        x = x + sp_map
+        x = self.out_conv(x)
+        return x
+
+
+class MFAB(nn.Module):
+    def __init__(
+        self, in_channels, skip_channels, out_channels, use_batchnorm=True, reduction=16
+    ):
+        # MFAB is just a modified version of SE-blocks, one for skip, one for input
+        super(MFAB, self).__init__()
+        self.hl_conv = nn.Sequential(
+            md.Conv2dReLU(
+                in_channels,
+                in_channels,
+                kernel_size=3,
+                padding=1,
+                use_batchnorm=use_batchnorm,
+            ),
+            md.Conv2dReLU(
+                in_channels, skip_channels, kernel_size=1, use_batchnorm=use_batchnorm
+            ),
+        )
+        reduced_channels = max(1, skip_channels // reduction)
+        self.SE_ll = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(skip_channels, reduced_channels, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(reduced_channels, skip_channels, 1),
+            nn.Sigmoid(),
+        )
+        self.SE_hl = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(skip_channels, reduced_channels, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(reduced_channels, skip_channels, 1),
+            nn.Sigmoid(),
+        )
+        self.conv1 = md.Conv2dReLU(
+            skip_channels
+            + skip_channels,  # we transform C-prime form high level to C from skip connection
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+
+    def forward(self, x, skip=None):
+        x = self.hl_conv(x)
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        attention_hl = self.SE_hl(x)
+        if skip is not None:
+            attention_ll = self.SE_ll(skip)
+            attention_hl = attention_hl + attention_ll
+            x = x * attention_hl
+            x = torch.cat([x, skip], dim=1)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+
+
+class DecoderBlock(nn.Module):
+    def __init__(self, in_channels, skip_channels, out_channels, use_batchnorm=True):
+        super().__init__()
+        self.conv1 = md.Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+
+    def forward(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+
+
+class MAnetDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        decoder_channels,
+        n_blocks=5,
+        reduction=16,
+        use_batchnorm=True,
+        pab_channels=64,
+    ):
+        super().__init__()
+
+        if n_blocks != len(decoder_channels):
+            raise ValueError(
+                "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format(
+                    n_blocks, len(decoder_channels)
+                )
+            )
+
+        # remove first skip with same spatial resolution
+        encoder_channels = encoder_channels[1:]
+
+        # reverse channels to start from head of encoder
+        encoder_channels = encoder_channels[::-1]
+
+        # computing blocks input and output channels
+        head_channels = encoder_channels[0]
+        in_channels = [head_channels] + list(decoder_channels[:-1])
+        skip_channels = list(encoder_channels[1:]) + [0]
+        out_channels = decoder_channels
+
+        self.center = PAB(head_channels, head_channels, pab_channels=pab_channels)
+
+        # combine decoder keyword arguments
+        kwargs = dict(use_batchnorm=use_batchnorm)  # no attention type here
+        blocks = [
+            MFAB(in_ch, skip_ch, out_ch, reduction=reduction, **kwargs)
+            if skip_ch > 0
+            else DecoderBlock(in_ch, skip_ch, out_ch, **kwargs)
+            for in_ch, skip_ch, out_ch in zip(in_channels, skip_channels, out_channels)
+        ]
+        # for the last we dont have skip connection -> use simple decoder block
+        self.blocks = nn.ModuleList(blocks)
+
+    def forward(self, *features):
+        features = features[1:]  # remove first skip with same spatial resolution
+        features = features[::-1]  # reverse channels to start from head of encoder
+
+        head = features[0]
+        skips = features[1:]
+
+        x = self.center(head)
+        for i, decoder_block in enumerate(self.blocks):
+            skip = skips[i] if i < len(skips) else None
+            x = decoder_block(x, skip)
+
+        return x
diff --git a/feature_extractor_models/decoders/manet/model.py b/feature_extractor_models/decoders/manet/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2ecac92af47734c23678f4993f7bd6f043ca0f4
--- /dev/null
+++ b/feature_extractor_models/decoders/manet/model.py
@@ -0,0 +1,102 @@
+from typing import Optional, Union, List
+
+from feature_extractor_models.encoders import get_encoder
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from .decoder import MAnetDecoder
+
+
+class MAnet(SegmentationModel):
+    """MAnet_ :  Multi-scale Attention Net. The MA-Net can capture rich contextual dependencies based on
+    the attention mechanism, using two blocks:
+     - Position-wise Attention Block (PAB), which captures the spatial dependencies between pixels in a global view
+     - Multi-scale Fusion Attention Block (MFAB), which  captures the channel dependencies between any feature map by
+       multi-scale semantic feature fusion
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder.
+            Length of the list should be the same as **encoder_depth**
+        decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers
+            is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption.
+            Available options are **True, False, "inplace"**
+        decoder_pab_channels: A number of channels for PAB module in decoder.
+            Default is 64.
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **MAnet**
+
+    .. _MAnet:
+        https://ieeexplore.ieee.org/abstract/document/9201310
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_use_batchnorm: bool = True,
+        decoder_channels: List[int] = (256, 128, 64, 32, 16),
+        decoder_pab_channels: int = 64,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = MAnetDecoder(
+            encoder_channels=self.encoder.out_channels,
+            decoder_channels=decoder_channels,
+            n_blocks=encoder_depth,
+            use_batchnorm=decoder_use_batchnorm,
+            pab_channels=decoder_pab_channels,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=decoder_channels[-1],
+            out_channels=classes,
+            activation=activation,
+            kernel_size=3,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "manet-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/pan/__init__.py b/feature_extractor_models/decoders/pan/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8710438531d7567b66d486d5537397f6f0ba1253
--- /dev/null
+++ b/feature_extractor_models/decoders/pan/__init__.py
@@ -0,0 +1,3 @@
+from .model import PAN
+
+__all__ = ["PAN"]
diff --git a/feature_extractor_models/decoders/pan/decoder.py b/feature_extractor_models/decoders/pan/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab8f8675a1b5dc6ed3447aef63caeb7d96331529
--- /dev/null
+++ b/feature_extractor_models/decoders/pan/decoder.py
@@ -0,0 +1,208 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ConvBnRelu(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        padding: int = 0,
+        dilation: int = 1,
+        groups: int = 1,
+        bias: bool = True,
+        add_relu: bool = True,
+        interpolate: bool = False,
+    ):
+        super(ConvBnRelu, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+            groups=groups,
+        )
+        self.add_relu = add_relu
+        self.interpolate = interpolate
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.activation = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.add_relu:
+            x = self.activation(x)
+        if self.interpolate:
+            x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
+        return x
+
+
+class FPABlock(nn.Module):
+    def __init__(self, in_channels, out_channels, upscale_mode="bilinear"):
+        super(FPABlock, self).__init__()
+
+        self.upscale_mode = upscale_mode
+        if self.upscale_mode == "bilinear":
+            self.align_corners = True
+        else:
+            self.align_corners = False
+
+        # global pooling branch
+        self.branch1 = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            ConvBnRelu(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+        )
+
+        # midddle branch
+        self.mid = nn.Sequential(
+            ConvBnRelu(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            )
+        )
+        self.down1 = nn.Sequential(
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            ConvBnRelu(
+                in_channels=in_channels,
+                out_channels=1,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+            ),
+        )
+        self.down2 = nn.Sequential(
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            ConvBnRelu(
+                in_channels=1, out_channels=1, kernel_size=5, stride=1, padding=2
+            ),
+        )
+        self.down3 = nn.Sequential(
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            ConvBnRelu(
+                in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1
+            ),
+            ConvBnRelu(
+                in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1
+            ),
+        )
+        self.conv2 = ConvBnRelu(
+            in_channels=1, out_channels=1, kernel_size=5, stride=1, padding=2
+        )
+        self.conv1 = ConvBnRelu(
+            in_channels=1, out_channels=1, kernel_size=7, stride=1, padding=3
+        )
+
+    def forward(self, x):
+        h, w = x.size(2), x.size(3)
+        b1 = self.branch1(x)
+        upscale_parameters = dict(
+            mode=self.upscale_mode, align_corners=self.align_corners
+        )
+        b1 = F.interpolate(b1, size=(h, w), **upscale_parameters)
+
+        mid = self.mid(x)
+        x1 = self.down1(x)
+        x2 = self.down2(x1)
+        x3 = self.down3(x2)
+        x3 = F.interpolate(x3, size=(h // 4, w // 4), **upscale_parameters)
+
+        x2 = self.conv2(x2)
+        x = x2 + x3
+        x = F.interpolate(x, size=(h // 2, w // 2), **upscale_parameters)
+
+        x1 = self.conv1(x1)
+        x = x + x1
+        x = F.interpolate(x, size=(h, w), **upscale_parameters)
+
+        x = torch.mul(x, mid)
+        x = x + b1
+        return x
+
+
+class GAUBlock(nn.Module):
+    def __init__(
+        self, in_channels: int, out_channels: int, upscale_mode: str = "bilinear"
+    ):
+        super(GAUBlock, self).__init__()
+
+        self.upscale_mode = upscale_mode
+        self.align_corners = True if upscale_mode == "bilinear" else None
+
+        self.conv1 = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            ConvBnRelu(
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                add_relu=False,
+            ),
+            nn.Sigmoid(),
+        )
+        self.conv2 = ConvBnRelu(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1
+        )
+
+    def forward(self, x, y):
+        """
+        Args:
+            x: low level feature
+            y: high level feature
+        """
+        h, w = x.size(2), x.size(3)
+        y_up = F.interpolate(
+            y, size=(h, w), mode=self.upscale_mode, align_corners=self.align_corners
+        )
+        x = self.conv2(x)
+        y = self.conv1(y)
+        z = torch.mul(x, y)
+        return y_up + z
+
+
+class PANDecoder(nn.Module):
+    def __init__(
+        self, encoder_channels, decoder_channels, upscale_mode: str = "bilinear"
+    ):
+        super().__init__()
+
+        self.fpa = FPABlock(
+            in_channels=encoder_channels[-1], out_channels=decoder_channels
+        )
+        self.gau3 = GAUBlock(
+            in_channels=encoder_channels[-2],
+            out_channels=decoder_channels,
+            upscale_mode=upscale_mode,
+        )
+        self.gau2 = GAUBlock(
+            in_channels=encoder_channels[-3],
+            out_channels=decoder_channels,
+            upscale_mode=upscale_mode,
+        )
+        self.gau1 = GAUBlock(
+            in_channels=encoder_channels[-4],
+            out_channels=decoder_channels,
+            upscale_mode=upscale_mode,
+        )
+
+    def forward(self, *features):
+        bottleneck = features[-1]
+        x5 = self.fpa(bottleneck)  # 1/32
+        x4 = self.gau3(features[-2], x5)  # 1/16
+        x3 = self.gau2(features[-3], x4)  # 1/8
+        x2 = self.gau1(features[-4], x3)  # 1/4
+
+        return x2
diff --git a/feature_extractor_models/decoders/pan/model.py b/feature_extractor_models/decoders/pan/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec3e1ad033eb9850e83e4024f1894a0b6434ad7a
--- /dev/null
+++ b/feature_extractor_models/decoders/pan/model.py
@@ -0,0 +1,100 @@
+from typing import Optional, Union
+
+from feature_extractor_models.encoders import get_encoder
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from .decoder import PANDecoder
+
+
+class PAN(SegmentationModel):
+    """Implementation of PAN_ (Pyramid Attention Network).
+
+    Note:
+        Currently works with shape of input tensor >= [B x C x 128 x 128] for pytorch <= 1.1.0
+        and with shape of input tensor >= [B x C x 256 x 256] for pytorch == 1.3.1
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        encoder_output_stride: 16 or 32, if 16 use dilation in encoder last layer.
+            Doesn't work with ***ception***, **vgg***, **densenet*`** backbones.Default is 16.
+        decoder_channels: A number of convolution layer filters in decoder blocks
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **PAN**
+
+    .. _PAN:
+        https://arxiv.org/abs/1805.10180
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_weights: Optional[str] = "imagenet",
+        encoder_output_stride: int = 16,
+        decoder_channels: int = 32,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        if encoder_output_stride not in [16, 32]:
+            raise ValueError(
+                "PAN support output stride 16 or 32, got {}".format(
+                    encoder_output_stride
+                )
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=5,
+            weights=encoder_weights,
+            output_stride=encoder_output_stride,
+        )
+
+        self.decoder = PANDecoder(
+            encoder_channels=self.encoder.out_channels,
+            decoder_channels=decoder_channels,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=decoder_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=3,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "pan-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/pspnet/__init__.py b/feature_extractor_models/decoders/pspnet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b8922eb35c50c4cedadd987a402189d3721bb65
--- /dev/null
+++ b/feature_extractor_models/decoders/pspnet/__init__.py
@@ -0,0 +1,3 @@
+from .model import PSPNet
+
+__all__ = ["PSPNet"]
diff --git a/feature_extractor_models/decoders/pspnet/decoder.py b/feature_extractor_models/decoders/pspnet/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..eda408400512b97b78604640bc6be1801fdf5fa8
--- /dev/null
+++ b/feature_extractor_models/decoders/pspnet/decoder.py
@@ -0,0 +1,76 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from feature_extractor_models.base import modules
+
+
+class PSPBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, pool_size, use_bathcnorm=True):
+        super().__init__()
+        if pool_size == 1:
+            use_bathcnorm = False  # PyTorch does not support BatchNorm for 1x1 shape
+        self.pool = nn.Sequential(
+            nn.AdaptiveAvgPool2d(output_size=(pool_size, pool_size)),
+            modules.Conv2dReLU(
+                in_channels, out_channels, (1, 1), use_batchnorm=use_bathcnorm
+            ),
+        )
+
+    def forward(self, x):
+        h, w = x.size(2), x.size(3)
+        x = self.pool(x)
+        x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=True)
+        return x
+
+
+class PSPModule(nn.Module):
+    def __init__(self, in_channels, sizes=(1, 2, 3, 6), use_bathcnorm=True):
+        super().__init__()
+
+        self.blocks = nn.ModuleList(
+            [
+                PSPBlock(
+                    in_channels,
+                    in_channels // len(sizes),
+                    size,
+                    use_bathcnorm=use_bathcnorm,
+                )
+                for size in sizes
+            ]
+        )
+
+    def forward(self, x):
+        xs = [block(x) for block in self.blocks] + [x]
+        x = torch.cat(xs, dim=1)
+        return x
+
+
+class PSPDecoder(nn.Module):
+    def __init__(
+        self, encoder_channels, use_batchnorm=True, out_channels=512, dropout=0.2
+    ):
+        super().__init__()
+
+        self.psp = PSPModule(
+            in_channels=encoder_channels[-1],
+            sizes=(1, 2, 3, 6),
+            use_bathcnorm=use_batchnorm,
+        )
+
+        self.conv = modules.Conv2dReLU(
+            in_channels=encoder_channels[-1] * 2,
+            out_channels=out_channels,
+            kernel_size=1,
+            use_batchnorm=use_batchnorm,
+        )
+
+        self.dropout = nn.Dropout2d(p=dropout)
+
+    def forward(self, *features):
+        x = features[-1]
+        x = self.psp(x)
+        x = self.conv(x)
+        x = self.dropout(x)
+
+        return x
diff --git a/feature_extractor_models/decoders/pspnet/model.py b/feature_extractor_models/decoders/pspnet/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c5f39a53823c717d16cdf4ce77c9095d0d69af3
--- /dev/null
+++ b/feature_extractor_models/decoders/pspnet/model.py
@@ -0,0 +1,101 @@
+from typing import Optional, Union
+
+from feature_extractor_models.encoders import get_encoder
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from .decoder import PSPDecoder
+
+
+class PSPNet(SegmentationModel):
+    """PSPNet_ is a fully convolution neural network for image semantic segmentation. Consist of
+    *encoder* and *Spatial Pyramid* (decoder). Spatial Pyramid build on top of encoder and does not
+    use "fine-features" (features of high spatial resolution). PSPNet can be used for multiclass segmentation
+    of high resolution images, however it is not good for detecting small objects and producing accurate,
+    pixel-level mask.
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        psp_out_channels: A number of filters in Spatial Pyramid
+        psp_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers
+            is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption.
+            Available options are **True, False, "inplace"**
+        psp_dropout: Spatial dropout rate in [0, 1) used in Spatial Pyramid
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **PSPNet**
+
+    .. _PSPNet:
+        https://arxiv.org/abs/1612.01105
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_weights: Optional[str] = "imagenet",
+        encoder_depth: int = 3,
+        psp_out_channels: int = 512,
+        psp_use_batchnorm: bool = True,
+        psp_dropout: float = 0.2,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        upsampling: int = 8,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = PSPDecoder(
+            encoder_channels=self.encoder.out_channels,
+            use_batchnorm=psp_use_batchnorm,
+            out_channels=psp_out_channels,
+            dropout=psp_dropout,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=psp_out_channels,
+            out_channels=classes,
+            kernel_size=3,
+            activation=activation,
+            upsampling=upsampling,
+        )
+
+        if aux_params:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "psp-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/unet/__init__.py b/feature_extractor_models/decoders/unet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcae4e0fbb8647fcfe9f2bd481964bbbe9ef0290
--- /dev/null
+++ b/feature_extractor_models/decoders/unet/__init__.py
@@ -0,0 +1,3 @@
+from .model import Unet
+
+__all__ = ["Unet"]
diff --git a/feature_extractor_models/decoders/unet/decoder.py b/feature_extractor_models/decoders/unet/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..313b208415095e3c9635b44f989958acb4a4db30
--- /dev/null
+++ b/feature_extractor_models/decoders/unet/decoder.py
@@ -0,0 +1,124 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from feature_extractor_models.base import modules as md
+
+
+class DecoderBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        skip_channels,
+        out_channels,
+        use_batchnorm=True,
+        attention_type=None,
+    ):
+        super().__init__()
+        self.conv1 = md.Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.attention1 = md.Attention(
+            attention_type, in_channels=in_channels + skip_channels
+        )
+        self.conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.attention2 = md.Attention(attention_type, in_channels=out_channels)
+
+    def forward(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+            x = self.attention1(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.attention2(x)
+        return x
+
+
+class CenterBlock(nn.Sequential):
+    def __init__(self, in_channels, out_channels, use_batchnorm=True):
+        conv1 = md.Conv2dReLU(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        super().__init__(conv1, conv2)
+
+
+class UnetDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        decoder_channels,
+        n_blocks=5,
+        use_batchnorm=True,
+        attention_type=None,
+        center=False,
+    ):
+        super().__init__()
+
+        if n_blocks != len(decoder_channels):
+            raise ValueError(
+                "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format(
+                    n_blocks, len(decoder_channels)
+                )
+            )
+
+        # remove first skip with same spatial resolution
+        encoder_channels = encoder_channels[1:]
+        # reverse channels to start from head of encoder
+        encoder_channels = encoder_channels[::-1]
+
+        # computing blocks input and output channels
+        head_channels = encoder_channels[0]
+        in_channels = [head_channels] + list(decoder_channels[:-1])
+        skip_channels = list(encoder_channels[1:]) + [0]
+        out_channels = decoder_channels
+
+        if center:
+            self.center = CenterBlock(
+                head_channels, head_channels, use_batchnorm=use_batchnorm
+            )
+        else:
+            self.center = nn.Identity()
+
+        # combine decoder keyword arguments
+        kwargs = dict(use_batchnorm=use_batchnorm, attention_type=attention_type)
+        blocks = [
+            DecoderBlock(in_ch, skip_ch, out_ch, **kwargs)
+            for in_ch, skip_ch, out_ch in zip(in_channels, skip_channels, out_channels)
+        ]
+        self.blocks = nn.ModuleList(blocks)
+
+    def forward(self, *features):
+        features = features[1:]  # remove first skip with same spatial resolution
+        features = features[::-1]  # reverse channels to start from head of encoder
+
+        head = features[0]
+        skips = features[1:]
+
+        x = self.center(head)
+        for i, decoder_block in enumerate(self.blocks):
+            skip = skips[i] if i < len(skips) else None
+            x = decoder_block(x, skip)
+
+        return x
diff --git a/feature_extractor_models/decoders/unet/model.py b/feature_extractor_models/decoders/unet/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..228064ef4560342986df0d6cc60df53e379bd56d
--- /dev/null
+++ b/feature_extractor_models/decoders/unet/model.py
@@ -0,0 +1,102 @@
+from typing import Optional, Union, List
+
+from feature_extractor_models.encoders import get_encoder
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from .decoder import UnetDecoder
+
+
+class Unet(SegmentationModel):
+    """Unet_ is a fully convolution neural network for image semantic segmentation. Consist of *encoder*
+    and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial
+    resolution (skip connections) which are used by decoder to define accurate segmentation mask. Use *concatenation*
+    for fusing decoder blocks with skip connections.
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder.
+            Length of the list should be the same as **encoder_depth**
+        decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers
+            is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption.
+            Available options are **True, False, "inplace"**
+        decoder_attention_type: Attention module used in decoder of the model. Available options are
+            **None** and **scse** (https://arxiv.org/abs/1808.08127).
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: Unet
+
+    .. _Unet:
+        https://arxiv.org/abs/1505.04597
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_use_batchnorm: bool = True,
+        decoder_channels: List[int] = (256, 128, 64, 32, 16),
+        decoder_attention_type: Optional[str] = None,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = UnetDecoder(
+            encoder_channels=self.encoder.out_channels,
+            decoder_channels=decoder_channels,
+            n_blocks=encoder_depth,
+            use_batchnorm=decoder_use_batchnorm,
+            center=True if encoder_name.startswith("vgg") else False,
+            attention_type=decoder_attention_type,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=decoder_channels[-1],
+            out_channels=classes,
+            activation=activation,
+            kernel_size=3,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "u-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/decoders/unetplusplus/__init__.py b/feature_extractor_models/decoders/unetplusplus/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd30de4abe176233aaed9daa57be792aaee00293
--- /dev/null
+++ b/feature_extractor_models/decoders/unetplusplus/__init__.py
@@ -0,0 +1,3 @@
+from .model import UnetPlusPlus
+
+__all__ = ["UnetPlusPlus"]
diff --git a/feature_extractor_models/decoders/unetplusplus/decoder.py b/feature_extractor_models/decoders/unetplusplus/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..551b48a63a8f24fd25cae4b7cacaceb89011862f
--- /dev/null
+++ b/feature_extractor_models/decoders/unetplusplus/decoder.py
@@ -0,0 +1,155 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from feature_extractor_models.base import modules as md
+
+
+class DecoderBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        skip_channels,
+        out_channels,
+        use_batchnorm=True,
+        attention_type=None,
+    ):
+        super().__init__()
+        self.conv1 = md.Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.attention1 = md.Attention(
+            attention_type, in_channels=in_channels + skip_channels
+        )
+        self.conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.attention2 = md.Attention(attention_type, in_channels=out_channels)
+
+    def forward(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="nearest")
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+            x = self.attention1(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.attention2(x)
+        return x
+
+
+class CenterBlock(nn.Sequential):
+    def __init__(self, in_channels, out_channels, use_batchnorm=True):
+        conv1 = md.Conv2dReLU(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        conv2 = md.Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        super().__init__(conv1, conv2)
+
+
+class UnetPlusPlusDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        decoder_channels,
+        n_blocks=5,
+        use_batchnorm=True,
+        attention_type=None,
+        center=False,
+    ):
+        super().__init__()
+
+        if n_blocks != len(decoder_channels):
+            raise ValueError(
+                "Model depth is {}, but you provide `decoder_channels` for {} blocks.".format(
+                    n_blocks, len(decoder_channels)
+                )
+            )
+
+        # remove first skip with same spatial resolution
+        encoder_channels = encoder_channels[1:]
+        # reverse channels to start from head of encoder
+        encoder_channels = encoder_channels[::-1]
+
+        # computing blocks input and output channels
+        head_channels = encoder_channels[0]
+        self.in_channels = [head_channels] + list(decoder_channels[:-1])
+        self.skip_channels = list(encoder_channels[1:]) + [0]
+        self.out_channels = decoder_channels
+        if center:
+            self.center = CenterBlock(
+                head_channels, head_channels, use_batchnorm=use_batchnorm
+            )
+        else:
+            self.center = nn.Identity()
+
+        # combine decoder keyword arguments
+        kwargs = dict(use_batchnorm=use_batchnorm, attention_type=attention_type)
+
+        blocks = {}
+        for layer_idx in range(len(self.in_channels) - 1):
+            for depth_idx in range(layer_idx + 1):
+                if depth_idx == 0:
+                    in_ch = self.in_channels[layer_idx]
+                    skip_ch = self.skip_channels[layer_idx] * (layer_idx + 1)
+                    out_ch = self.out_channels[layer_idx]
+                else:
+                    out_ch = self.skip_channels[layer_idx]
+                    skip_ch = self.skip_channels[layer_idx] * (
+                        layer_idx + 1 - depth_idx
+                    )
+                    in_ch = self.skip_channels[layer_idx - 1]
+                blocks[f"x_{depth_idx}_{layer_idx}"] = DecoderBlock(
+                    in_ch, skip_ch, out_ch, **kwargs
+                )
+        blocks[f"x_{0}_{len(self.in_channels)-1}"] = DecoderBlock(
+            self.in_channels[-1], 0, self.out_channels[-1], **kwargs
+        )
+        self.blocks = nn.ModuleDict(blocks)
+        self.depth = len(self.in_channels) - 1
+
+    def forward(self, *features):
+        features = features[1:]  # remove first skip with same spatial resolution
+        features = features[::-1]  # reverse channels to start from head of encoder
+        # start building dense connections
+        dense_x = {}
+        for layer_idx in range(len(self.in_channels) - 1):
+            for depth_idx in range(self.depth - layer_idx):
+                if layer_idx == 0:
+                    output = self.blocks[f"x_{depth_idx}_{depth_idx}"](
+                        features[depth_idx], features[depth_idx + 1]
+                    )
+                    dense_x[f"x_{depth_idx}_{depth_idx}"] = output
+                else:
+                    dense_l_i = depth_idx + layer_idx
+                    cat_features = [
+                        dense_x[f"x_{idx}_{dense_l_i}"]
+                        for idx in range(depth_idx + 1, dense_l_i + 1)
+                    ]
+                    cat_features = torch.cat(
+                        cat_features + [features[dense_l_i + 1]], dim=1
+                    )
+                    dense_x[f"x_{depth_idx}_{dense_l_i}"] = self.blocks[
+                        f"x_{depth_idx}_{dense_l_i}"
+                    ](dense_x[f"x_{depth_idx}_{dense_l_i-1}"], cat_features)
+        dense_x[f"x_{0}_{self.depth}"] = self.blocks[f"x_{0}_{self.depth}"](
+            dense_x[f"x_{0}_{self.depth-1}"]
+        )
+        return dense_x[f"x_{0}_{self.depth}"]
diff --git a/feature_extractor_models/decoders/unetplusplus/model.py b/feature_extractor_models/decoders/unetplusplus/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..319881342838454c7df18f6844bdb903e8ffc9e6
--- /dev/null
+++ b/feature_extractor_models/decoders/unetplusplus/model.py
@@ -0,0 +1,107 @@
+from typing import Optional, Union, List
+
+from feature_extractor_models.encoders import get_encoder
+from feature_extractor_models.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+from .decoder import UnetPlusPlusDecoder
+
+
+class UnetPlusPlus(SegmentationModel):
+    """Unet++ is a fully convolution neural network for image semantic segmentation. Consist of *encoder*
+    and *decoder* parts connected with *skip connections*. Encoder extract features of different spatial
+    resolution (skip connections) which are used by decoder to define accurate segmentation mask. Decoder of
+    Unet++ is more complex than in usual Unet.
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder.
+            Length of the list should be the same as **encoder_depth**
+        decoder_use_batchnorm: If **True**, BatchNorm2d layer between Conv2D and Activation layers
+            is used. If **"inplace"** InplaceABN will be used, allows to decrease memory consumption.
+            Available options are **True, False, "inplace"**
+        decoder_attention_type: Attention module used in decoder of the model.
+            Available options are **None** and **scse** (https://arxiv.org/abs/1808.08127).
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+
+    Returns:
+        ``torch.nn.Module``: **Unet++**
+
+    Reference:
+        https://arxiv.org/abs/1807.10165
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_use_batchnorm: bool = True,
+        decoder_channels: List[int] = (256, 128, 64, 32, 16),
+        decoder_attention_type: Optional[str] = None,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[Union[str, callable]] = None,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        if encoder_name.startswith("mit_b"):
+            raise ValueError(
+                "UnetPlusPlus is not support encoder_name={}".format(encoder_name)
+            )
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+        )
+
+        self.decoder = UnetPlusPlusDecoder(
+            encoder_channels=self.encoder.out_channels,
+            decoder_channels=decoder_channels,
+            n_blocks=encoder_depth,
+            use_batchnorm=decoder_use_batchnorm,
+            center=True if encoder_name.startswith("vgg") else False,
+            attention_type=decoder_attention_type,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=decoder_channels[-1],
+            out_channels=classes,
+            activation=activation,
+            kernel_size=3,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(
+                in_channels=self.encoder.out_channels[-1], **aux_params
+            )
+        else:
+            self.classification_head = None
+
+        self.name = "unetplusplus-{}".format(encoder_name)
+        self.initialize()
diff --git a/feature_extractor_models/encoders/__init__.py b/feature_extractor_models/encoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4a4c037537f8bc51a02f377d0c238ba4c3cd6f8
--- /dev/null
+++ b/feature_extractor_models/encoders/__init__.py
@@ -0,0 +1,126 @@
+import timm
+import functools
+import torch.utils.model_zoo as model_zoo
+
+from .resnet import resnet_encoders
+from .dpn import dpn_encoders
+from .vgg import vgg_encoders
+from .senet import senet_encoders
+from .densenet import densenet_encoders
+from .inceptionresnetv2 import inceptionresnetv2_encoders
+from .inceptionv4 import inceptionv4_encoders
+from .efficientnet import efficient_net_encoders
+from .mobilenet import mobilenet_encoders
+from .xception import xception_encoders
+from .timm_efficientnet import timm_efficientnet_encoders
+from .timm_resnest import timm_resnest_encoders
+from .timm_res2net import timm_res2net_encoders
+from .timm_regnet import timm_regnet_encoders
+from .timm_sknet import timm_sknet_encoders
+from .timm_mobilenetv3 import timm_mobilenetv3_encoders
+from .timm_gernet import timm_gernet_encoders
+from .mix_transformer import mix_transformer_encoders
+from .mobileone import mobileone_encoders
+
+from .timm_universal import TimmUniversalEncoder
+
+from ._preprocessing import preprocess_input
+
+encoders = {}
+encoders.update(resnet_encoders)
+encoders.update(dpn_encoders)
+encoders.update(vgg_encoders)
+encoders.update(senet_encoders)
+encoders.update(densenet_encoders)
+encoders.update(inceptionresnetv2_encoders)
+encoders.update(inceptionv4_encoders)
+encoders.update(efficient_net_encoders)
+encoders.update(mobilenet_encoders)
+encoders.update(xception_encoders)
+encoders.update(timm_efficientnet_encoders)
+encoders.update(timm_resnest_encoders)
+encoders.update(timm_res2net_encoders)
+encoders.update(timm_regnet_encoders)
+encoders.update(timm_sknet_encoders)
+encoders.update(timm_mobilenetv3_encoders)
+encoders.update(timm_gernet_encoders)
+encoders.update(mix_transformer_encoders)
+encoders.update(mobileone_encoders)
+
+
+def get_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, **kwargs):
+    if name.startswith("tu-"):
+        name = name[3:]
+        encoder = TimmUniversalEncoder(
+            name=name,
+            in_channels=in_channels,
+            depth=depth,
+            output_stride=output_stride,
+            pretrained=weights is not None,
+            **kwargs,
+        )
+        return encoder
+
+    try:
+        Encoder = encoders[name]["encoder"]
+    except KeyError:
+        raise KeyError(
+            "Wrong encoder name `{}`, supported encoders: {}".format(
+                name, list(encoders.keys())
+            )
+        )
+
+    params = encoders[name]["params"]
+    params.update(depth=depth)
+    encoder = Encoder(**params)
+
+    if weights is not None:
+        try:
+            settings = encoders[name]["pretrained_settings"][weights]
+        except KeyError:
+            raise KeyError(
+                "Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format(
+                    weights, name, list(encoders[name]["pretrained_settings"].keys())
+                )
+            )
+        encoder.load_state_dict(model_zoo.load_url(settings["url"]))
+
+    encoder.set_in_channels(in_channels, pretrained=weights is not None)
+    if output_stride != 32:
+        encoder.make_dilated(output_stride)
+
+    return encoder
+
+
+def get_encoder_names():
+    return list(encoders.keys())
+
+
+def get_preprocessing_params(encoder_name, pretrained="imagenet"):
+    if encoder_name.startswith("tu-"):
+        encoder_name = encoder_name[3:]
+        if not timm.models.is_model_pretrained(encoder_name):
+            raise ValueError(
+                f"{encoder_name} does not have pretrained weights and preprocessing parameters"
+            )
+        settings = timm.models.get_pretrained_cfg(encoder_name).__dict__
+    else:
+        all_settings = encoders[encoder_name]["pretrained_settings"]
+        if pretrained not in all_settings.keys():
+            raise ValueError(
+                "Available pretrained options {}".format(all_settings.keys())
+            )
+        settings = all_settings[pretrained]
+
+    formatted_settings = {}
+    formatted_settings["input_space"] = settings.get("input_space", "RGB")
+    formatted_settings["input_range"] = list(settings.get("input_range", [0, 1]))
+    formatted_settings["mean"] = list(settings["mean"])
+    formatted_settings["std"] = list(settings["std"])
+
+    return formatted_settings
+
+
+def get_preprocessing_fn(encoder_name, pretrained="imagenet"):
+    params = get_preprocessing_params(encoder_name, pretrained=pretrained)
+    return functools.partial(preprocess_input, **params)
diff --git a/feature_extractor_models/encoders/_base.py b/feature_extractor_models/encoders/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b877075394a30206250ccd26e6c2305130fbf05
--- /dev/null
+++ b/feature_extractor_models/encoders/_base.py
@@ -0,0 +1,58 @@
+from . import _utils as utils
+
+
+class EncoderMixin:
+    """Add encoder functionality such as:
+    - output channels specification of feature tensors (produced by encoder)
+    - patching first convolution for arbitrary input channels
+    """
+
+    _output_stride = 32
+
+    @property
+    def out_channels(self):
+        """Return channels dimensions for each tensor of forward output of encoder"""
+        return self._out_channels[: self._depth + 1]
+
+    @property
+    def output_stride(self):
+        return min(self._output_stride, 2**self._depth)
+
+    def set_in_channels(self, in_channels, pretrained=True):
+        """Change first convolution channels"""
+        if in_channels == 3:
+            return
+
+        self._in_channels = in_channels
+        if self._out_channels[0] == 3:
+            self._out_channels = tuple([in_channels] + list(self._out_channels)[1:])
+
+        utils.patch_first_conv(
+            model=self, new_in_channels=in_channels, pretrained=pretrained
+        )
+
+    def get_stages(self):
+        """Override it in your implementation"""
+        raise NotImplementedError
+
+    def make_dilated(self, output_stride):
+        if output_stride == 16:
+            stage_list = [5]
+            dilation_list = [2]
+
+        elif output_stride == 8:
+            stage_list = [4, 5]
+            dilation_list = [2, 4]
+
+        else:
+            raise ValueError(
+                "Output stride should be 16 or 8, got {}.".format(output_stride)
+            )
+
+        self._output_stride = output_stride
+
+        stages = self.get_stages()
+        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
+            utils.replace_strides_with_dilation(
+                module=stages[stage_indx], dilation_rate=dilation_rate
+            )
diff --git a/feature_extractor_models/encoders/_preprocessing.py b/feature_extractor_models/encoders/_preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..57c7e6d2d566aca14c963adda7ee318853419d9d
--- /dev/null
+++ b/feature_extractor_models/encoders/_preprocessing.py
@@ -0,0 +1,22 @@
+import numpy as np
+
+
+def preprocess_input(
+    x, mean=None, std=None, input_space="RGB", input_range=None, **kwargs
+):
+    if input_space == "BGR":
+        x = x[..., ::-1].copy()
+
+    if input_range is not None:
+        if x.max() > 1 and input_range[1] == 1:
+            x = x / 255.0
+
+    if mean is not None:
+        mean = np.array(mean)
+        x = x - mean
+
+    if std is not None:
+        std = np.array(std)
+        x = x / std
+
+    return x
diff --git a/feature_extractor_models/encoders/_utils.py b/feature_extractor_models/encoders/_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..86128099a550986ca51a3ff263748b4e76a46911
--- /dev/null
+++ b/feature_extractor_models/encoders/_utils.py
@@ -0,0 +1,57 @@
+import torch
+import torch.nn as nn
+
+
+def patch_first_conv(model, new_in_channels, default_in_channels=3, pretrained=True):
+    """Change first convolution layer input channels.
+    In case:
+        in_channels == 1 or in_channels == 2 -> reuse original weights
+        in_channels > 3 -> make random kaiming normal initialization
+    """
+
+    # get first conv
+    for module in model.modules():
+        if isinstance(module, nn.Conv2d) and module.in_channels == default_in_channels:
+            break
+
+    weight = module.weight.detach()
+    module.in_channels = new_in_channels
+
+    if not pretrained:
+        module.weight = nn.parameter.Parameter(
+            torch.Tensor(
+                module.out_channels,
+                new_in_channels // module.groups,
+                *module.kernel_size,
+            )
+        )
+        module.reset_parameters()
+
+    elif new_in_channels == 1:
+        new_weight = weight.sum(1, keepdim=True)
+        module.weight = nn.parameter.Parameter(new_weight)
+
+    else:
+        new_weight = torch.Tensor(
+            module.out_channels, new_in_channels // module.groups, *module.kernel_size
+        )
+
+        for i in range(new_in_channels):
+            new_weight[:, i] = weight[:, i % default_in_channels]
+
+        new_weight = new_weight * (default_in_channels / new_in_channels)
+        module.weight = nn.parameter.Parameter(new_weight)
+
+
+def replace_strides_with_dilation(module, dilation_rate):
+    """Patch Conv2d modules replacing strides with dilation"""
+    for mod in module.modules():
+        if isinstance(mod, nn.Conv2d):
+            mod.stride = (1, 1)
+            mod.dilation = (dilation_rate, dilation_rate)
+            kh, kw = mod.kernel_size
+            mod.padding = ((kh // 2) * dilation_rate, (kh // 2) * dilation_rate)
+
+            # Kostyl for EfficientNet
+            if hasattr(mod, "static_padding"):
+                mod.static_padding = nn.Identity()
diff --git a/feature_extractor_models/encoders/densenet.py b/feature_extractor_models/encoders/densenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4bd0ce22c0f084c87c8ae8e8bc57f1fac726448
--- /dev/null
+++ b/feature_extractor_models/encoders/densenet.py
@@ -0,0 +1,155 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import re
+import torch.nn as nn
+
+from pretrainedmodels.models.torchvision_models import pretrained_settings
+from torchvision.models.densenet import DenseNet
+
+from ._base import EncoderMixin
+
+
+class TransitionWithSkip(nn.Module):
+    def __init__(self, module):
+        super().__init__()
+        self.module = module
+
+    def forward(self, x):
+        for module in self.module:
+            x = module(x)
+            if isinstance(module, nn.ReLU):
+                skip = x
+        return x, skip
+
+
+class DenseNetEncoder(DenseNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+        del self.classifier
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError(
+            "DenseNet encoders do not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(
+                self.features.conv0, self.features.norm0, self.features.relu0
+            ),
+            nn.Sequential(
+                self.features.pool0,
+                self.features.denseblock1,
+                TransitionWithSkip(self.features.transition1),
+            ),
+            nn.Sequential(
+                self.features.denseblock2, TransitionWithSkip(self.features.transition2)
+            ),
+            nn.Sequential(
+                self.features.denseblock3, TransitionWithSkip(self.features.transition3)
+            ),
+            nn.Sequential(self.features.denseblock4, self.features.norm5),
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            if isinstance(x, (list, tuple)):
+                x, skip = x
+                features.append(skip)
+            else:
+                features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict):
+        pattern = re.compile(
+            r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$"
+        )
+        for key in list(state_dict.keys()):
+            res = pattern.match(key)
+            if res:
+                new_key = res.group(1) + res.group(2)
+                state_dict[new_key] = state_dict[key]
+                del state_dict[key]
+
+        # remove linear
+        state_dict.pop("classifier.bias", None)
+        state_dict.pop("classifier.weight", None)
+
+        super().load_state_dict(state_dict)
+
+
+densenet_encoders = {
+    "densenet121": {
+        "encoder": DenseNetEncoder,
+        "pretrained_settings": pretrained_settings["densenet121"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 1024),
+            "num_init_features": 64,
+            "growth_rate": 32,
+            "block_config": (6, 12, 24, 16),
+        },
+    },
+    "densenet169": {
+        "encoder": DenseNetEncoder,
+        "pretrained_settings": pretrained_settings["densenet169"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1280, 1664),
+            "num_init_features": 64,
+            "growth_rate": 32,
+            "block_config": (6, 12, 32, 32),
+        },
+    },
+    "densenet201": {
+        "encoder": DenseNetEncoder,
+        "pretrained_settings": pretrained_settings["densenet201"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1792, 1920),
+            "num_init_features": 64,
+            "growth_rate": 32,
+            "block_config": (6, 12, 48, 32),
+        },
+    },
+    "densenet161": {
+        "encoder": DenseNetEncoder,
+        "pretrained_settings": pretrained_settings["densenet161"],
+        "params": {
+            "out_channels": (3, 96, 384, 768, 2112, 2208),
+            "num_init_features": 96,
+            "growth_rate": 48,
+            "block_config": (6, 12, 36, 24),
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/dpn.py b/feature_extractor_models/encoders/dpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..220c66de77985a048f88d0825467d520500ca076
--- /dev/null
+++ b/feature_extractor_models/encoders/dpn.py
@@ -0,0 +1,173 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from pretrainedmodels.models.dpn import DPN
+from pretrainedmodels.models.dpn import pretrained_settings
+
+from ._base import EncoderMixin
+
+
+class DPNEncoder(DPN, EncoderMixin):
+    def __init__(self, stage_idxs, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._stage_idxs = stage_idxs
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.last_linear
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(
+                self.features[0].conv, self.features[0].bn, self.features[0].act
+            ),
+            nn.Sequential(
+                self.features[0].pool, self.features[1 : self._stage_idxs[0]]
+            ),
+            self.features[self._stage_idxs[0] : self._stage_idxs[1]],
+            self.features[self._stage_idxs[1] : self._stage_idxs[2]],
+            self.features[self._stage_idxs[2] : self._stage_idxs[3]],
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            if isinstance(x, (list, tuple)):
+                features.append(F.relu(torch.cat(x, dim=1), inplace=True))
+            else:
+                features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("last_linear.bias", None)
+        state_dict.pop("last_linear.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+dpn_encoders = {
+    "dpn68": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn68"],
+        "params": {
+            "stage_idxs": (4, 8, 20, 24),
+            "out_channels": (3, 10, 144, 320, 704, 832),
+            "groups": 32,
+            "inc_sec": (16, 32, 32, 64),
+            "k_r": 128,
+            "k_sec": (3, 4, 12, 3),
+            "num_classes": 1000,
+            "num_init_features": 10,
+            "small": True,
+            "test_time_pool": True,
+        },
+    },
+    "dpn68b": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn68b"],
+        "params": {
+            "stage_idxs": (4, 8, 20, 24),
+            "out_channels": (3, 10, 144, 320, 704, 832),
+            "b": True,
+            "groups": 32,
+            "inc_sec": (16, 32, 32, 64),
+            "k_r": 128,
+            "k_sec": (3, 4, 12, 3),
+            "num_classes": 1000,
+            "num_init_features": 10,
+            "small": True,
+            "test_time_pool": True,
+        },
+    },
+    "dpn92": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn92"],
+        "params": {
+            "stage_idxs": (4, 8, 28, 32),
+            "out_channels": (3, 64, 336, 704, 1552, 2688),
+            "groups": 32,
+            "inc_sec": (16, 32, 24, 128),
+            "k_r": 96,
+            "k_sec": (3, 4, 20, 3),
+            "num_classes": 1000,
+            "num_init_features": 64,
+            "test_time_pool": True,
+        },
+    },
+    "dpn98": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn98"],
+        "params": {
+            "stage_idxs": (4, 10, 30, 34),
+            "out_channels": (3, 96, 336, 768, 1728, 2688),
+            "groups": 40,
+            "inc_sec": (16, 32, 32, 128),
+            "k_r": 160,
+            "k_sec": (3, 6, 20, 3),
+            "num_classes": 1000,
+            "num_init_features": 96,
+            "test_time_pool": True,
+        },
+    },
+    "dpn107": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn107"],
+        "params": {
+            "stage_idxs": (5, 13, 33, 37),
+            "out_channels": (3, 128, 376, 1152, 2432, 2688),
+            "groups": 50,
+            "inc_sec": (20, 64, 64, 128),
+            "k_r": 200,
+            "k_sec": (4, 8, 20, 3),
+            "num_classes": 1000,
+            "num_init_features": 128,
+            "test_time_pool": True,
+        },
+    },
+    "dpn131": {
+        "encoder": DPNEncoder,
+        "pretrained_settings": pretrained_settings["dpn131"],
+        "params": {
+            "stage_idxs": (5, 13, 41, 45),
+            "out_channels": (3, 128, 352, 832, 1984, 2688),
+            "groups": 40,
+            "inc_sec": (16, 32, 32, 128),
+            "k_r": 160,
+            "k_sec": (4, 8, 28, 3),
+            "num_classes": 1000,
+            "num_init_features": 128,
+            "test_time_pool": True,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/efficientnet.py b/feature_extractor_models/encoders/efficientnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7af6b4498ba910f56daf471fb8448d137f1ffd
--- /dev/null
+++ b/feature_extractor_models/encoders/efficientnet.py
@@ -0,0 +1,177 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch.nn as nn
+from efficientnet_pytorch import EfficientNet
+from efficientnet_pytorch.utils import url_map, url_map_advprop, get_model_params
+
+from ._base import EncoderMixin
+
+
+class EfficientNetEncoder(EfficientNet, EncoderMixin):
+    def __init__(self, stage_idxs, out_channels, model_name, depth=5):
+        blocks_args, global_params = get_model_params(model_name, override_params=None)
+        super().__init__(blocks_args, global_params)
+
+        self._stage_idxs = stage_idxs
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        del self._fc
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self._conv_stem, self._bn0, self._swish),
+            self._blocks[: self._stage_idxs[0]],
+            self._blocks[self._stage_idxs[0] : self._stage_idxs[1]],
+            self._blocks[self._stage_idxs[1] : self._stage_idxs[2]],
+            self._blocks[self._stage_idxs[2] :],
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        block_number = 0.0
+        drop_connect_rate = self._global_params.drop_connect_rate
+
+        features = []
+        for i in range(self._depth + 1):
+            # Identity and Sequential stages
+            if i < 2:
+                x = stages[i](x)
+
+            # Block stages need drop_connect rate
+            else:
+                for module in stages[i]:
+                    drop_connect = drop_connect_rate * block_number / len(self._blocks)
+                    block_number += 1.0
+                    x = module(x, drop_connect)
+
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("_fc.bias", None)
+        state_dict.pop("_fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+def _get_pretrained_settings(encoder):
+    pretrained_settings = {
+        "imagenet": {
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "url": url_map[encoder],
+            "input_space": "RGB",
+            "input_range": [0, 1],
+        },
+        "advprop": {
+            "mean": [0.5, 0.5, 0.5],
+            "std": [0.5, 0.5, 0.5],
+            "url": url_map_advprop[encoder],
+            "input_space": "RGB",
+            "input_range": [0, 1],
+        },
+    }
+    return pretrained_settings
+
+
+efficient_net_encoders = {
+    "efficientnet-b0": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b0"),
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (3, 5, 9, 16),
+            "model_name": "efficientnet-b0",
+        },
+    },
+    "efficientnet-b1": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b1"),
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (5, 8, 16, 23),
+            "model_name": "efficientnet-b1",
+        },
+    },
+    "efficientnet-b2": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b2"),
+        "params": {
+            "out_channels": (3, 32, 24, 48, 120, 352),
+            "stage_idxs": (5, 8, 16, 23),
+            "model_name": "efficientnet-b2",
+        },
+    },
+    "efficientnet-b3": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b3"),
+        "params": {
+            "out_channels": (3, 40, 32, 48, 136, 384),
+            "stage_idxs": (5, 8, 18, 26),
+            "model_name": "efficientnet-b3",
+        },
+    },
+    "efficientnet-b4": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b4"),
+        "params": {
+            "out_channels": (3, 48, 32, 56, 160, 448),
+            "stage_idxs": (6, 10, 22, 32),
+            "model_name": "efficientnet-b4",
+        },
+    },
+    "efficientnet-b5": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b5"),
+        "params": {
+            "out_channels": (3, 48, 40, 64, 176, 512),
+            "stage_idxs": (8, 13, 27, 39),
+            "model_name": "efficientnet-b5",
+        },
+    },
+    "efficientnet-b6": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b6"),
+        "params": {
+            "out_channels": (3, 56, 40, 72, 200, 576),
+            "stage_idxs": (9, 15, 31, 45),
+            "model_name": "efficientnet-b6",
+        },
+    },
+    "efficientnet-b7": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": _get_pretrained_settings("efficientnet-b7"),
+        "params": {
+            "out_channels": (3, 64, 48, 80, 224, 640),
+            "stage_idxs": (11, 18, 38, 55),
+            "model_name": "efficientnet-b7",
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/inceptionresnetv2.py b/feature_extractor_models/encoders/inceptionresnetv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d90c7f475166ac17e2826dd4b093eceea0d9097
--- /dev/null
+++ b/feature_extractor_models/encoders/inceptionresnetv2.py
@@ -0,0 +1,91 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch.nn as nn
+from pretrainedmodels.models.inceptionresnetv2 import InceptionResNetV2
+from pretrainedmodels.models.inceptionresnetv2 import pretrained_settings
+
+from ._base import EncoderMixin
+
+
+class InceptionResNetV2Encoder(InceptionResNetV2, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        # correct paddings
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.kernel_size == (3, 3):
+                    m.padding = (1, 1)
+            if isinstance(m, nn.MaxPool2d):
+                m.padding = (1, 1)
+
+        # remove linear layers
+        del self.avgpool_1a
+        del self.last_linear
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError(
+            "InceptionResnetV2 encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv2d_1a, self.conv2d_2a, self.conv2d_2b),
+            nn.Sequential(self.maxpool_3a, self.conv2d_3b, self.conv2d_4a),
+            nn.Sequential(self.maxpool_5a, self.mixed_5b, self.repeat),
+            nn.Sequential(self.mixed_6a, self.repeat_1),
+            nn.Sequential(self.mixed_7a, self.repeat_2, self.block8, self.conv2d_7b),
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("last_linear.bias", None)
+        state_dict.pop("last_linear.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+inceptionresnetv2_encoders = {
+    "inceptionresnetv2": {
+        "encoder": InceptionResNetV2Encoder,
+        "pretrained_settings": pretrained_settings["inceptionresnetv2"],
+        "params": {"out_channels": (3, 64, 192, 320, 1088, 1536), "num_classes": 1000},
+    }
+}
diff --git a/feature_extractor_models/encoders/inceptionv4.py b/feature_extractor_models/encoders/inceptionv4.py
new file mode 100644
index 0000000000000000000000000000000000000000..83adf00365379c322fbf0d0a177b78aea21ed65e
--- /dev/null
+++ b/feature_extractor_models/encoders/inceptionv4.py
@@ -0,0 +1,94 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch.nn as nn
+from pretrainedmodels.models.inceptionv4 import InceptionV4
+from pretrainedmodels.models.inceptionv4 import pretrained_settings
+
+from ._base import EncoderMixin
+
+
+class InceptionV4Encoder(InceptionV4, EncoderMixin):
+    def __init__(self, stage_idxs, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._stage_idxs = stage_idxs
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        # correct paddings
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.kernel_size == (3, 3):
+                    m.padding = (1, 1)
+            if isinstance(m, nn.MaxPool2d):
+                m.padding = (1, 1)
+
+        # remove linear layers
+        del self.last_linear
+
+    def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "InceptionV4 encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            self.features[: self._stage_idxs[0]],
+            self.features[self._stage_idxs[0] : self._stage_idxs[1]],
+            self.features[self._stage_idxs[1] : self._stage_idxs[2]],
+            self.features[self._stage_idxs[2] : self._stage_idxs[3]],
+            self.features[self._stage_idxs[3] :],
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("last_linear.bias", None)
+        state_dict.pop("last_linear.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+inceptionv4_encoders = {
+    "inceptionv4": {
+        "encoder": InceptionV4Encoder,
+        "pretrained_settings": pretrained_settings["inceptionv4"],
+        "params": {
+            "stage_idxs": (3, 5, 9, 15),
+            "out_channels": (3, 64, 192, 384, 1024, 1536),
+            "num_classes": 1001,
+        },
+    }
+}
diff --git a/feature_extractor_models/encoders/mix_transformer.py b/feature_extractor_models/encoders/mix_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c7091256b76fbfe00b4cfe731be7e573b3fe078
--- /dev/null
+++ b/feature_extractor_models/encoders/mix_transformer.py
@@ -0,0 +1,663 @@
+# ---------------------------------------------------------------
+# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+#
+# This work is licensed under the NVIDIA Source Code License
+# ---------------------------------------------------------------
+import math
+import torch
+import torch.nn as nn
+from functools import partial
+
+from timm.layers import DropPath, to_2tuple, trunc_normal_
+
+
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads=8,
+        qkv_bias=False,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+        sr_ratio=1,
+    ):
+        super().__init__()
+        assert (
+            dim % num_heads == 0
+        ), f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = (
+            self.q(x)
+            .reshape(B, N, self.num_heads, C // self.num_heads)
+            .permute(0, 2, 1, 3)
+        )
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = (
+                self.kv(x_)
+                .reshape(B, -1, 2, self.num_heads, C // self.num_heads)
+                .permute(2, 0, 3, 1, 4)
+            )
+        else:
+            kv = (
+                self.kv(x)
+                .reshape(B, -1, 2, self.num_heads, C // self.num_heads)
+                .permute(2, 0, 3, 1, 4)
+            )
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+        sr_ratio=1,
+    ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+            sr_ratio=sr_ratio,
+        )
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+
+        return x
+
+
+class OverlapPatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(
+            in_chans,
+            embed_dim,
+            kernel_size=patch_size,
+            stride=stride,
+            padding=(patch_size[0] // 2, patch_size[1] // 2),
+        )
+        self.norm = nn.LayerNorm(embed_dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm(x)
+
+        return x, H, W
+
+
+class MixVisionTransformer(nn.Module):
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        num_classes=1000,
+        embed_dims=[64, 128, 256, 512],
+        num_heads=[1, 2, 4, 8],
+        mlp_ratios=[4, 4, 4, 4],
+        qkv_bias=False,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        depths=[3, 4, 6, 3],
+        sr_ratios=[8, 4, 2, 1],
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(
+            img_size=img_size,
+            patch_size=7,
+            stride=4,
+            in_chans=in_chans,
+            embed_dim=embed_dims[0],
+        )
+        self.patch_embed2 = OverlapPatchEmbed(
+            img_size=img_size // 4,
+            patch_size=3,
+            stride=2,
+            in_chans=embed_dims[0],
+            embed_dim=embed_dims[1],
+        )
+        self.patch_embed3 = OverlapPatchEmbed(
+            img_size=img_size // 8,
+            patch_size=3,
+            stride=2,
+            in_chans=embed_dims[1],
+            embed_dim=embed_dims[2],
+        )
+        self.patch_embed4 = OverlapPatchEmbed(
+            img_size=img_size // 16,
+            patch_size=3,
+            stride=2,
+            in_chans=embed_dims[2],
+            embed_dim=embed_dims[3],
+        )
+
+        # transformer encoder
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+        ]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dims[0],
+                    num_heads=num_heads[0],
+                    mlp_ratio=mlp_ratios[0],
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[cur + i],
+                    norm_layer=norm_layer,
+                    sr_ratio=sr_ratios[0],
+                )
+                for i in range(depths[0])
+            ]
+        )
+        self.norm1 = norm_layer(embed_dims[0])
+
+        cur += depths[0]
+        self.block2 = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dims[1],
+                    num_heads=num_heads[1],
+                    mlp_ratio=mlp_ratios[1],
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[cur + i],
+                    norm_layer=norm_layer,
+                    sr_ratio=sr_ratios[1],
+                )
+                for i in range(depths[1])
+            ]
+        )
+        self.norm2 = norm_layer(embed_dims[1])
+
+        cur += depths[1]
+        self.block3 = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dims[2],
+                    num_heads=num_heads[2],
+                    mlp_ratio=mlp_ratios[2],
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[cur + i],
+                    norm_layer=norm_layer,
+                    sr_ratio=sr_ratios[2],
+                )
+                for i in range(depths[2])
+            ]
+        )
+        self.norm3 = norm_layer(embed_dims[2])
+
+        cur += depths[2]
+        self.block4 = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dims[3],
+                    num_heads=num_heads[3],
+                    mlp_ratio=mlp_ratios[3],
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[cur + i],
+                    norm_layer=norm_layer,
+                    sr_ratio=sr_ratios[3],
+                )
+                for i in range(depths[3])
+            ]
+        )
+        self.norm4 = norm_layer(embed_dims[3])
+
+        # classification head
+        # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def init_weights(self, pretrained=None):
+        pass
+
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for i in range(self.depths[0]):
+            self.block1[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[0]
+        for i in range(self.depths[1]):
+            self.block2[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[1]
+        for i in range(self.depths[2]):
+            self.block3[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[2]
+        for i in range(self.depths[3]):
+            self.block4[i].drop_path.drop_prob = dpr[cur + i]
+
+    def freeze_patch_emb(self):
+        self.patch_embed1.requires_grad = False
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {
+            "pos_embed1",
+            "pos_embed2",
+            "pos_embed3",
+            "pos_embed4",
+            "cls_token",
+        }  # has pos_embed may be better
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = (
+            nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        )
+
+    def forward_features(self, x):
+        B = x.shape[0]
+        outs = []
+
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for i, blk in enumerate(self.block1):
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for i, blk in enumerate(self.block2):
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for i, blk in enumerate(self.block3):
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for i, blk in enumerate(self.block4):
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        return outs
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head(x)
+
+        return x
+
+
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+
+        return x
+
+
+# ---------------------------------------------------------------
+# End of NVIDIA code
+# ---------------------------------------------------------------
+
+from ._base import EncoderMixin  # noqa E402
+
+
+class MixVisionTransformerEncoder(MixVisionTransformer, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError("MixVisionTransformer encoder does not support dilated mode")
+
+    def set_in_channels(self, in_channels, *args, **kwargs):
+        if in_channels != 3:
+            raise ValueError(
+                "MixVisionTransformer encoder does not support in_channels setting other than 3"
+            )
+
+    def forward(self, x):
+        # create dummy output for the first block
+        B, C, H, W = x.shape
+        dummy = torch.empty([B, 0, H // 2, W // 2], dtype=x.dtype, device=x.device)
+
+        return [x, dummy] + self.forward_features(x)[: self._depth - 1]
+
+    def load_state_dict(self, state_dict):
+        state_dict.pop("head.weight", None)
+        state_dict.pop("head.bias", None)
+        return super().load_state_dict(state_dict)
+
+
+def get_pretrained_cfg(name):
+    return {
+        "url": "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/{}.pth".format(
+            name
+        ),
+        "input_space": "RGB",
+        "input_size": [3, 224, 224],
+        "input_range": [0, 1],
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225],
+    }
+
+
+mix_transformer_encoders = {
+    "mit_b0": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b0")},
+        "params": dict(
+            out_channels=(3, 0, 32, 64, 160, 256),
+            patch_size=4,
+            embed_dims=[32, 64, 160, 256],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[2, 2, 2, 2],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+    "mit_b1": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b1")},
+        "params": dict(
+            out_channels=(3, 0, 64, 128, 320, 512),
+            patch_size=4,
+            embed_dims=[64, 128, 320, 512],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[2, 2, 2, 2],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+    "mit_b2": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b2")},
+        "params": dict(
+            out_channels=(3, 0, 64, 128, 320, 512),
+            patch_size=4,
+            embed_dims=[64, 128, 320, 512],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[3, 4, 6, 3],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+    "mit_b3": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b3")},
+        "params": dict(
+            out_channels=(3, 0, 64, 128, 320, 512),
+            patch_size=4,
+            embed_dims=[64, 128, 320, 512],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[3, 4, 18, 3],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+    "mit_b4": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b4")},
+        "params": dict(
+            out_channels=(3, 0, 64, 128, 320, 512),
+            patch_size=4,
+            embed_dims=[64, 128, 320, 512],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[3, 8, 27, 3],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+    "mit_b5": {
+        "encoder": MixVisionTransformerEncoder,
+        "pretrained_settings": {"imagenet": get_pretrained_cfg("mit_b5")},
+        "params": dict(
+            out_channels=(3, 0, 64, 128, 320, 512),
+            patch_size=4,
+            embed_dims=[64, 128, 320, 512],
+            num_heads=[1, 2, 5, 8],
+            mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True,
+            norm_layer=partial(nn.LayerNorm, eps=1e-6),
+            depths=[3, 6, 40, 3],
+            sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0,
+            drop_path_rate=0.1,
+        ),
+    },
+}
diff --git a/feature_extractor_models/encoders/mobilenet.py b/feature_extractor_models/encoders/mobilenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd30f142a822f8df1e5aeb2671a99e7509bc763e
--- /dev/null
+++ b/feature_extractor_models/encoders/mobilenet.py
@@ -0,0 +1,80 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torchvision
+import torch.nn as nn
+
+from ._base import EncoderMixin
+
+
+class MobileNetV2Encoder(torchvision.models.MobileNetV2, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+        del self.classifier
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            self.features[:2],
+            self.features[2:4],
+            self.features[4:7],
+            self.features[7:14],
+            self.features[14:],
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("classifier.1.bias", None)
+        state_dict.pop("classifier.1.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+mobilenet_encoders = {
+    "mobilenet_v2": {
+        "encoder": MobileNetV2Encoder,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {"out_channels": (3, 16, 24, 32, 96, 1280)},
+    }
+}
diff --git a/feature_extractor_models/encoders/mobileone.py b/feature_extractor_models/encoders/mobileone.py
new file mode 100644
index 0000000000000000000000000000000000000000..76f500536ca85449c9be7153c41a0c24bbf06f83
--- /dev/null
+++ b/feature_extractor_models/encoders/mobileone.py
@@ -0,0 +1,559 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2022 Apple Inc. All Rights Reserved.
+#
+import copy
+from typing import List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from . import _utils as utils
+from ._base import EncoderMixin
+
+__all__ = ["MobileOne", "reparameterize_model"]
+
+
+class SEBlock(nn.Module):
+    """Squeeze and Excite module.
+
+    Pytorch implementation of `Squeeze-and-Excitation Networks` -
+    https://arxiv.org/pdf/1709.01507.pdf
+    """
+
+    def __init__(self, in_channels: int, rd_ratio: float = 0.0625) -> None:
+        """Construct a Squeeze and Excite Module.
+
+        :param in_channels: Number of input channels.
+        :param rd_ratio: Input channel reduction ratio.
+        """
+        super(SEBlock, self).__init__()
+        self.reduce = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=int(in_channels * rd_ratio),
+            kernel_size=1,
+            stride=1,
+            bias=True,
+        )
+        self.expand = nn.Conv2d(
+            in_channels=int(in_channels * rd_ratio),
+            out_channels=in_channels,
+            kernel_size=1,
+            stride=1,
+            bias=True,
+        )
+
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        """Apply forward pass."""
+        b, c, h, w = inputs.size()
+        x = F.avg_pool2d(inputs, kernel_size=[h, w])
+        x = self.reduce(x)
+        x = F.relu(x)
+        x = self.expand(x)
+        x = torch.sigmoid(x)
+        x = x.view(-1, c, 1, 1)
+        return inputs * x
+
+
+class MobileOneBlock(nn.Module):
+    """MobileOne building block.
+
+    This block has a multi-branched architecture at train-time
+    and plain-CNN style architecture at inference time
+    For more details, please refer to our paper:
+    `An Improved One millisecond Mobile Backbone` -
+    https://arxiv.org/pdf/2206.04040.pdf
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        padding: int = 0,
+        dilation: int = 1,
+        groups: int = 1,
+        inference_mode: bool = False,
+        use_se: bool = False,
+        num_conv_branches: int = 1,
+    ) -> None:
+        """Construct a MobileOneBlock module.
+
+        :param in_channels: Number of channels in the input.
+        :param out_channels: Number of channels produced by the block.
+        :param kernel_size: Size of the convolution kernel.
+        :param stride: Stride size.
+        :param padding: Zero-padding size.
+        :param dilation: Kernel dilation factor.
+        :param groups: Group number.
+        :param inference_mode: If True, instantiates model in inference mode.
+        :param use_se: Whether to use SE-ReLU activations.
+        :param num_conv_branches: Number of linear conv branches.
+        """
+        super(MobileOneBlock, self).__init__()
+        self.inference_mode = inference_mode
+        self.groups = groups
+        self.stride = stride
+        self.kernel_size = kernel_size
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_conv_branches = num_conv_branches
+
+        # Check if SE-ReLU is requested
+        if use_se:
+            self.se = SEBlock(out_channels)
+        else:
+            self.se = nn.Identity()
+        self.activation = nn.ReLU()
+
+        if inference_mode:
+            self.reparam_conv = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                bias=True,
+            )
+        else:
+            # Re-parameterizable skip connection
+            self.rbr_skip = (
+                nn.BatchNorm2d(num_features=in_channels)
+                if out_channels == in_channels and stride == 1
+                else None
+            )
+
+            # Re-parameterizable conv branches
+            rbr_conv = list()
+            for _ in range(self.num_conv_branches):
+                rbr_conv.append(self._conv_bn(kernel_size=kernel_size, padding=padding))
+            self.rbr_conv = nn.ModuleList(rbr_conv)
+
+            # Re-parameterizable scale branch
+            self.rbr_scale = None
+            if kernel_size > 1:
+                self.rbr_scale = self._conv_bn(kernel_size=1, padding=0)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply forward pass."""
+        # Inference mode forward pass.
+        if self.inference_mode:
+            return self.activation(self.se(self.reparam_conv(x)))
+
+        # Multi-branched train-time forward pass.
+        # Skip branch output
+        identity_out = 0
+        if self.rbr_skip is not None:
+            identity_out = self.rbr_skip(x)
+
+        # Scale branch output
+        scale_out = 0
+        if self.rbr_scale is not None:
+            scale_out = self.rbr_scale(x)
+
+        # Other branches
+        out = scale_out + identity_out
+        for ix in range(self.num_conv_branches):
+            out += self.rbr_conv[ix](x)
+
+        return self.activation(self.se(out))
+
+    def reparameterize(self):
+        """Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
+        https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
+        architecture used at training time to obtain a plain CNN-like structure
+        for inference.
+        """
+        if self.inference_mode:
+            return
+        kernel, bias = self._get_kernel_bias()
+        self.reparam_conv = nn.Conv2d(
+            in_channels=self.rbr_conv[0].conv.in_channels,
+            out_channels=self.rbr_conv[0].conv.out_channels,
+            kernel_size=self.rbr_conv[0].conv.kernel_size,
+            stride=self.rbr_conv[0].conv.stride,
+            padding=self.rbr_conv[0].conv.padding,
+            dilation=self.rbr_conv[0].conv.dilation,
+            groups=self.rbr_conv[0].conv.groups,
+            bias=True,
+        )
+        self.reparam_conv.weight.data = kernel
+        self.reparam_conv.bias.data = bias
+
+        # Delete un-used branches
+        for para in self.parameters():
+            para.detach_()
+        self.__delattr__("rbr_conv")
+        self.__delattr__("rbr_scale")
+        if hasattr(self, "rbr_skip"):
+            self.__delattr__("rbr_skip")
+
+        self.inference_mode = True
+
+    def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Obtain the re-parameterized kernel and bias.
+        Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
+
+        :return: Tuple of (kernel, bias) after fusing branches.
+        """
+        # get weights and bias of scale branch
+        kernel_scale = 0
+        bias_scale = 0
+        if self.rbr_scale is not None:
+            kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
+            # Pad scale branch kernel to match conv branch kernel size.
+            pad = self.kernel_size // 2
+            kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
+
+        # get weights and bias of skip branch
+        kernel_identity = 0
+        bias_identity = 0
+        if self.rbr_skip is not None:
+            kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip)
+
+        # get weights and bias of conv branches
+        kernel_conv = 0
+        bias_conv = 0
+        for ix in range(self.num_conv_branches):
+            _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
+            kernel_conv += _kernel
+            bias_conv += _bias
+
+        kernel_final = kernel_conv + kernel_scale + kernel_identity
+        bias_final = bias_conv + bias_scale + bias_identity
+        return kernel_final, bias_final
+
+    def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Fuse batchnorm layer with preceeding conv layer.
+        Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
+
+        :param branch:
+        :return: Tuple of (kernel, bias) after fusing batchnorm.
+        """
+        if isinstance(branch, nn.Sequential):
+            kernel = branch.conv.weight
+            running_mean = branch.bn.running_mean
+            running_var = branch.bn.running_var
+            gamma = branch.bn.weight
+            beta = branch.bn.bias
+            eps = branch.bn.eps
+        else:
+            assert isinstance(branch, nn.BatchNorm2d)
+            if not hasattr(self, "id_tensor"):
+                input_dim = self.in_channels // self.groups
+                kernel_value = torch.zeros(
+                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
+                    dtype=branch.weight.dtype,
+                    device=branch.weight.device,
+                )
+                for i in range(self.in_channels):
+                    kernel_value[
+                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
+                    ] = 1
+                self.id_tensor = kernel_value
+            kernel = self.id_tensor
+            running_mean = branch.running_mean
+            running_var = branch.running_var
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+    def _conv_bn(self, kernel_size: int, padding: int) -> nn.Sequential:
+        """Construct conv-batchnorm layers.
+
+        :param kernel_size: Size of the convolution kernel.
+        :param padding: Zero-padding size.
+        :return: Conv-BN module.
+        """
+        mod_list = nn.Sequential()
+        mod_list.add_module(
+            "conv",
+            nn.Conv2d(
+                in_channels=self.in_channels,
+                out_channels=self.out_channels,
+                kernel_size=kernel_size,
+                stride=self.stride,
+                padding=padding,
+                groups=self.groups,
+                bias=False,
+            ),
+        )
+        mod_list.add_module("bn", nn.BatchNorm2d(num_features=self.out_channels))
+        return mod_list
+
+
+class MobileOne(nn.Module, EncoderMixin):
+    """MobileOne Model
+
+    Pytorch implementation of `An Improved One millisecond Mobile Backbone` -
+    https://arxiv.org/pdf/2206.04040.pdf
+    """
+
+    def __init__(
+        self,
+        out_channels,
+        num_blocks_per_stage: List[int] = [2, 8, 10, 1],
+        width_multipliers: Optional[List[float]] = None,
+        inference_mode: bool = False,
+        use_se: bool = False,
+        depth=5,
+        in_channels=3,
+        num_conv_branches: int = 1,
+    ) -> None:
+        """Construct MobileOne model.
+
+        :param num_blocks_per_stage: List of number of blocks per stage.
+        :param num_classes: Number of classes in the dataset.
+        :param width_multipliers: List of width multiplier for blocks in a stage.
+        :param inference_mode: If True, instantiates model in inference mode.
+        :param use_se: Whether to use SE-ReLU activations.
+        :param num_conv_branches: Number of linear conv branches.
+        """
+        super().__init__()
+
+        assert len(width_multipliers) == 4
+        self.inference_mode = inference_mode
+        self._out_channels = out_channels
+        self.in_planes = min(64, int(64 * width_multipliers[0]))
+        self.use_se = use_se
+        self.num_conv_branches = num_conv_branches
+        self._depth = depth
+        self._in_channels = in_channels
+        self.set_in_channels(self._in_channels)
+
+        # Build stages
+        self.stage0 = MobileOneBlock(
+            in_channels=self._in_channels,
+            out_channels=self.in_planes,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            inference_mode=self.inference_mode,
+        )
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multipliers[0]), num_blocks_per_stage[0], num_se_blocks=0
+        )
+        self.stage2 = self._make_stage(
+            int(128 * width_multipliers[1]), num_blocks_per_stage[1], num_se_blocks=0
+        )
+        self.stage3 = self._make_stage(
+            int(256 * width_multipliers[2]),
+            num_blocks_per_stage[2],
+            num_se_blocks=int(num_blocks_per_stage[2] // 2) if use_se else 0,
+        )
+        self.stage4 = self._make_stage(
+            int(512 * width_multipliers[3]),
+            num_blocks_per_stage[3],
+            num_se_blocks=num_blocks_per_stage[3] if use_se else 0,
+        )
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            self.stage0,
+            self.stage1,
+            self.stage2,
+            self.stage3,
+            self.stage4,
+        ]
+
+    def _make_stage(
+        self, planes: int, num_blocks: int, num_se_blocks: int
+    ) -> nn.Sequential:
+        """Build a stage of MobileOne model.
+
+        :param planes: Number of output channels.
+        :param num_blocks: Number of blocks in this stage.
+        :param num_se_blocks: Number of SE blocks in this stage.
+        :return: A stage of MobileOne model.
+        """
+        # Get strides for all layers
+        strides = [2] + [1] * (num_blocks - 1)
+        blocks = []
+        for ix, stride in enumerate(strides):
+            use_se = False
+            if num_se_blocks > num_blocks:
+                raise ValueError(
+                    "Number of SE blocks cannot " "exceed number of layers."
+                )
+            if ix >= (num_blocks - num_se_blocks):
+                use_se = True
+
+            # Depthwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=self.in_planes,
+                    kernel_size=3,
+                    stride=stride,
+                    padding=1,
+                    groups=self.in_planes,
+                    inference_mode=self.inference_mode,
+                    use_se=use_se,
+                    num_conv_branches=self.num_conv_branches,
+                )
+            )
+            # Pointwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=planes,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                    groups=1,
+                    inference_mode=self.inference_mode,
+                    use_se=use_se,
+                    num_conv_branches=self.num_conv_branches,
+                )
+            )
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+        return nn.Sequential(*blocks)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply forward pass."""
+        stages = self.get_stages()
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("linear.weight", None)
+        state_dict.pop("linear.bias", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+    def set_in_channels(self, in_channels, pretrained=True):
+        """Change first convolution channels"""
+        if in_channels == 3:
+            return
+
+        self._in_channels = in_channels
+        self._out_channels = tuple([in_channels] + list(self._out_channels)[1:])
+        utils.patch_first_conv(
+            model=self.stage0.rbr_conv,
+            new_in_channels=in_channels,
+            pretrained=pretrained,
+        )
+        utils.patch_first_conv(
+            model=self.stage0.rbr_scale,
+            new_in_channels=in_channels,
+            pretrained=pretrained,
+        )
+
+
+def reparameterize_model(model: torch.nn.Module) -> nn.Module:
+    """Return a model where a multi-branched structure
+        used in training is re-parameterized into a single branch
+        for inference.
+
+    :param model: MobileOne model in train mode.
+    :return: MobileOne model in inference mode.
+    """
+    # Avoid editing original graph
+    model = copy.deepcopy(model)
+    for module in model.modules():
+        if hasattr(module, "reparameterize"):
+            module.reparameterize()
+    return model
+
+
+mobileone_encoders = {
+    "mobileone_s0": {
+        "encoder": MobileOne,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s0_unfused.pth.tar",  # noqa
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {
+            "out_channels": (3, 48, 48, 128, 256, 1024),
+            "width_multipliers": (0.75, 1.0, 1.0, 2.0),
+            "num_conv_branches": 4,
+            "inference_mode": False,
+        },
+    },
+    "mobileone_s1": {
+        "encoder": MobileOne,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s1_unfused.pth.tar",  # noqa
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {
+            "out_channels": (3, 64, 96, 192, 512, 1280),
+            "width_multipliers": (1.5, 1.5, 2.0, 2.5),
+            "inference_mode": False,
+        },
+    },
+    "mobileone_s2": {
+        "encoder": MobileOne,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s2_unfused.pth.tar",  # noqa
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {
+            "out_channels": (3, 64, 96, 256, 640, 2048),
+            "width_multipliers": (1.5, 2.0, 2.5, 4.0),
+            "inference_mode": False,
+        },
+    },
+    "mobileone_s3": {
+        "encoder": MobileOne,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s3_unfused.pth.tar",  # noqa
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {
+            "out_channels": (3, 64, 128, 320, 768, 2048),
+            "width_multipliers": (2.0, 2.5, 3.0, 4.0),
+            "inference_mode": False,
+        },
+    },
+    "mobileone_s4": {
+        "encoder": MobileOne,
+        "pretrained_settings": {
+            "imagenet": {
+                "mean": [0.485, 0.456, 0.406],
+                "std": [0.229, 0.224, 0.225],
+                "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s4_unfused.pth.tar",  # noqa
+                "input_space": "RGB",
+                "input_range": [0, 1],
+            }
+        },
+        "params": {
+            "out_channels": (3, 64, 192, 448, 896, 2048),
+            "width_multipliers": (3.0, 3.5, 3.5, 4.0),
+            "use_se": True,
+            "inference_mode": False,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/resnet.py b/feature_extractor_models/encoders/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..2040a42c3e9e7bd9f53a1cafadfb53ef21523338
--- /dev/null
+++ b/feature_extractor_models/encoders/resnet.py
@@ -0,0 +1,239 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+from copy import deepcopy
+
+import torch.nn as nn
+
+from torchvision.models.resnet import ResNet
+from torchvision.models.resnet import BasicBlock
+from torchvision.models.resnet import Bottleneck
+from pretrainedmodels.models.torchvision_models import pretrained_settings
+
+from ._base import EncoderMixin
+
+
+class ResNetEncoder(ResNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.fc
+        del self.avgpool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv1, self.bn1, self.relu),
+            nn.Sequential(self.maxpool, self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+new_settings = {
+    "resnet18": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth",  # noqa
+    },
+    "resnet50": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth",  # noqa
+    },
+    "resnext50_32x4d": {
+        "imagenet": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext50_32x4-ddb3e555.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext50_32x4-72679e44.pth",  # noqa
+    },
+    "resnext101_32x4d": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth",  # noqa
+    },
+    "resnext101_32x8d": {
+        "imagenet": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth",  # noqa
+    },
+    "resnext101_32x16d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x16-15fffa57.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth",  # noqa
+    },
+    "resnext101_32x32d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth"
+    },
+    "resnext101_32x48d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth"
+    },
+}
+
+pretrained_settings = deepcopy(pretrained_settings)
+for model_name, sources in new_settings.items():
+    if model_name not in pretrained_settings:
+        pretrained_settings[model_name] = {}
+
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+
+resnet_encoders = {
+    "resnet18": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet18"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": BasicBlock,
+            "layers": [2, 2, 2, 2],
+        },
+    },
+    "resnet34": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet34"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": BasicBlock,
+            "layers": [3, 4, 6, 3],
+        },
+    },
+    "resnet50": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet50"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 6, 3],
+        },
+    },
+    "resnet101": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet101"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+        },
+    },
+    "resnet152": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet152"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 8, 36, 3],
+        },
+    },
+    "resnext50_32x4d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext50_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 6, 3],
+            "groups": 32,
+            "width_per_group": 4,
+        },
+    },
+    "resnext101_32x4d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 4,
+        },
+    },
+    "resnext101_32x8d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x8d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 8,
+        },
+    },
+    "resnext101_32x16d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x16d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 16,
+        },
+    },
+    "resnext101_32x32d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x32d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 32,
+        },
+    },
+    "resnext101_32x48d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x48d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 48,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/senet.py b/feature_extractor_models/encoders/senet.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e0f6fd851f07a0eed93146d63c39dad250de151
--- /dev/null
+++ b/feature_extractor_models/encoders/senet.py
@@ -0,0 +1,174 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch.nn as nn
+
+from pretrainedmodels.models.senet import (
+    SENet,
+    SEBottleneck,
+    SEResNetBottleneck,
+    SEResNeXtBottleneck,
+    pretrained_settings,
+)
+from ._base import EncoderMixin
+
+
+class SENetEncoder(SENet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        del self.last_linear
+        del self.avg_pool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            self.layer0[:-1],
+            nn.Sequential(self.layer0[-1], self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("last_linear.bias", None)
+        state_dict.pop("last_linear.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+senet_encoders = {
+    "senet154": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["senet154"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": SEBottleneck,
+            "dropout_p": 0.2,
+            "groups": 64,
+            "layers": [3, 8, 36, 3],
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+    "se_resnet50": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["se_resnet50"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SEResNetBottleneck,
+            "layers": [3, 4, 6, 3],
+            "downsample_kernel_size": 1,
+            "downsample_padding": 0,
+            "dropout_p": None,
+            "groups": 1,
+            "inplanes": 64,
+            "input_3x3": False,
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+    "se_resnet101": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["se_resnet101"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SEResNetBottleneck,
+            "layers": [3, 4, 23, 3],
+            "downsample_kernel_size": 1,
+            "downsample_padding": 0,
+            "dropout_p": None,
+            "groups": 1,
+            "inplanes": 64,
+            "input_3x3": False,
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+    "se_resnet152": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["se_resnet152"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SEResNetBottleneck,
+            "layers": [3, 8, 36, 3],
+            "downsample_kernel_size": 1,
+            "downsample_padding": 0,
+            "dropout_p": None,
+            "groups": 1,
+            "inplanes": 64,
+            "input_3x3": False,
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+    "se_resnext50_32x4d": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["se_resnext50_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SEResNeXtBottleneck,
+            "layers": [3, 4, 6, 3],
+            "downsample_kernel_size": 1,
+            "downsample_padding": 0,
+            "dropout_p": None,
+            "groups": 32,
+            "inplanes": 64,
+            "input_3x3": False,
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+    "se_resnext101_32x4d": {
+        "encoder": SENetEncoder,
+        "pretrained_settings": pretrained_settings["se_resnext101_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SEResNeXtBottleneck,
+            "layers": [3, 4, 23, 3],
+            "downsample_kernel_size": 1,
+            "downsample_padding": 0,
+            "dropout_p": None,
+            "groups": 32,
+            "inplanes": 64,
+            "input_3x3": False,
+            "num_classes": 1000,
+            "reduction": 16,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_efficientnet.py b/feature_extractor_models/encoders/timm_efficientnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc248575a44254e2a09dc2f8e4f0f26b8a5f72d9
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_efficientnet.py
@@ -0,0 +1,456 @@
+from functools import partial
+
+import torch.nn as nn
+
+from timm.models.efficientnet import EfficientNet
+from timm.models.efficientnet import decode_arch_def, round_channels, default_cfgs
+from timm.layers.activations import Swish
+
+from ._base import EncoderMixin
+
+
+def get_efficientnet_kwargs(
+    channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2
+):
+    """Create EfficientNet model.
+    Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
+    Paper: https://arxiv.org/abs/1905.11946
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+    'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+    'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+    'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+    'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+    'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+    'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+    'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+    'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+    'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+    'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+    """
+    arch_def = [
+        ["ds_r1_k3_s1_e1_c16_se0.25"],
+        ["ir_r2_k3_s2_e6_c24_se0.25"],
+        ["ir_r2_k5_s2_e6_c40_se0.25"],
+        ["ir_r3_k3_s2_e6_c80_se0.25"],
+        ["ir_r3_k5_s1_e6_c112_se0.25"],
+        ["ir_r4_k5_s2_e6_c192_se0.25"],
+        ["ir_r1_k3_s1_e6_c320_se0.25"],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier),
+        num_features=round_channels(1280, channel_multiplier, 8, None),
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        act_layer=Swish,
+        drop_rate=drop_rate,
+        drop_path_rate=0.2,
+    )
+    return model_kwargs
+
+
+def gen_efficientnet_lite_kwargs(
+    channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2
+):
+    """EfficientNet-Lite model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite
+    Paper: https://arxiv.org/abs/1905.11946
+
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+      'efficientnet-lite0': (1.0, 1.0, 224, 0.2),
+      'efficientnet-lite1': (1.0, 1.1, 240, 0.2),
+      'efficientnet-lite2': (1.1, 1.2, 260, 0.3),
+      'efficientnet-lite3': (1.2, 1.4, 280, 0.3),
+      'efficientnet-lite4': (1.4, 1.8, 300, 0.3),
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+    """
+    arch_def = [
+        ["ds_r1_k3_s1_e1_c16"],
+        ["ir_r2_k3_s2_e6_c24"],
+        ["ir_r2_k5_s2_e6_c40"],
+        ["ir_r3_k3_s2_e6_c80"],
+        ["ir_r3_k5_s1_e6_c112"],
+        ["ir_r4_k5_s2_e6_c192"],
+        ["ir_r1_k3_s1_e6_c320"],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True),
+        num_features=1280,
+        stem_size=32,
+        fix_stem=True,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        act_layer=nn.ReLU6,
+        drop_rate=drop_rate,
+        drop_path_rate=0.2,
+    )
+    return model_kwargs
+
+
+class EfficientNetBaseEncoder(EfficientNet, EncoderMixin):
+    def __init__(self, stage_idxs, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+
+        self._stage_idxs = stage_idxs
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        del self.classifier
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv_stem, self.bn1),
+            self.blocks[: self._stage_idxs[0]],
+            self.blocks[self._stage_idxs[0] : self._stage_idxs[1]],
+            self.blocks[self._stage_idxs[1] : self._stage_idxs[2]],
+            self.blocks[self._stage_idxs[2] :],
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("classifier.bias", None)
+        state_dict.pop("classifier.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+class EfficientNetEncoder(EfficientNetBaseEncoder):
+    def __init__(
+        self,
+        stage_idxs,
+        out_channels,
+        depth=5,
+        channel_multiplier=1.0,
+        depth_multiplier=1.0,
+        drop_rate=0.2,
+    ):
+        kwargs = get_efficientnet_kwargs(
+            channel_multiplier, depth_multiplier, drop_rate
+        )
+        super().__init__(stage_idxs, out_channels, depth, **kwargs)
+
+
+class EfficientNetLiteEncoder(EfficientNetBaseEncoder):
+    def __init__(
+        self,
+        stage_idxs,
+        out_channels,
+        depth=5,
+        channel_multiplier=1.0,
+        depth_multiplier=1.0,
+        drop_rate=0.2,
+    ):
+        kwargs = gen_efficientnet_lite_kwargs(
+            channel_multiplier, depth_multiplier, drop_rate
+        )
+        super().__init__(stage_idxs, out_channels, depth, **kwargs)
+
+
+def prepare_settings(settings):
+    return {
+        "mean": settings.mean,
+        "std": settings.std,
+        "url": settings.url,
+        "input_range": (0, 1),
+        "input_space": "RGB",
+    }
+
+
+timm_efficientnet_encoders = {
+    "timm-efficientnet-b0": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b0"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b0"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b0"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.0,
+            "depth_multiplier": 1.0,
+            "drop_rate": 0.2,
+        },
+    },
+    "timm-efficientnet-b1": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b1"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b1"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b1"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.0,
+            "depth_multiplier": 1.1,
+            "drop_rate": 0.2,
+        },
+    },
+    "timm-efficientnet-b2": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b2"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b2"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b2"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 48, 120, 352),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.1,
+            "depth_multiplier": 1.2,
+            "drop_rate": 0.3,
+        },
+    },
+    "timm-efficientnet-b3": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b3"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b3"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b3"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 40, 32, 48, 136, 384),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.2,
+            "depth_multiplier": 1.4,
+            "drop_rate": 0.3,
+        },
+    },
+    "timm-efficientnet-b4": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b4"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b4"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b4"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 48, 32, 56, 160, 448),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.4,
+            "depth_multiplier": 1.8,
+            "drop_rate": 0.4,
+        },
+    },
+    "timm-efficientnet-b5": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b5"].cfgs["in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b5"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b5"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 48, 40, 64, 176, 512),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.6,
+            "depth_multiplier": 2.2,
+            "drop_rate": 0.4,
+        },
+    },
+    "timm-efficientnet-b6": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b6"].cfgs["aa_in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b6"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b6"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 56, 40, 72, 200, 576),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.8,
+            "depth_multiplier": 2.6,
+            "drop_rate": 0.5,
+        },
+    },
+    "timm-efficientnet-b7": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b7"].cfgs["aa_in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b7"].cfgs["ap_in1k"]
+            ),
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_b7"].cfgs["ns_jft_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 64, 48, 80, 224, 640),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 2.0,
+            "depth_multiplier": 3.1,
+            "drop_rate": 0.5,
+        },
+    },
+    "timm-efficientnet-b8": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_b8"].cfgs["ra_in1k"]
+            ),
+            "advprop": prepare_settings(
+                default_cfgs["tf_efficientnet_b8"].cfgs["ap_in1k"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 72, 56, 88, 248, 704),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 2.2,
+            "depth_multiplier": 3.6,
+            "drop_rate": 0.5,
+        },
+    },
+    "timm-efficientnet-l2": {
+        "encoder": EfficientNetEncoder,
+        "pretrained_settings": {
+            "noisy-student": prepare_settings(
+                default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k"]
+            ),
+            "noisy-student-475": prepare_settings(
+                default_cfgs["tf_efficientnet_l2"].cfgs["ns_jft_in1k_475"]
+            ),
+        },
+        "params": {
+            "out_channels": (3, 136, 104, 176, 480, 1376),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 4.3,
+            "depth_multiplier": 5.3,
+            "drop_rate": 0.5,
+        },
+    },
+    "timm-tf_efficientnet_lite0": {
+        "encoder": EfficientNetLiteEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_lite0"].cfgs["in1k"]
+            )
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.0,
+            "depth_multiplier": 1.0,
+            "drop_rate": 0.2,
+        },
+    },
+    "timm-tf_efficientnet_lite1": {
+        "encoder": EfficientNetLiteEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_lite1"].cfgs["in1k"]
+            )
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 40, 112, 320),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.0,
+            "depth_multiplier": 1.1,
+            "drop_rate": 0.2,
+        },
+    },
+    "timm-tf_efficientnet_lite2": {
+        "encoder": EfficientNetLiteEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_lite2"].cfgs["in1k"]
+            )
+        },
+        "params": {
+            "out_channels": (3, 32, 24, 48, 120, 352),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.1,
+            "depth_multiplier": 1.2,
+            "drop_rate": 0.3,
+        },
+    },
+    "timm-tf_efficientnet_lite3": {
+        "encoder": EfficientNetLiteEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_lite3"].cfgs["in1k"]
+            )
+        },
+        "params": {
+            "out_channels": (3, 32, 32, 48, 136, 384),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.2,
+            "depth_multiplier": 1.4,
+            "drop_rate": 0.3,
+        },
+    },
+    "timm-tf_efficientnet_lite4": {
+        "encoder": EfficientNetLiteEncoder,
+        "pretrained_settings": {
+            "imagenet": prepare_settings(
+                default_cfgs["tf_efficientnet_lite4"].cfgs["in1k"]
+            )
+        },
+        "params": {
+            "out_channels": (3, 32, 32, 56, 160, 448),
+            "stage_idxs": (2, 3, 5),
+            "channel_multiplier": 1.4,
+            "depth_multiplier": 1.8,
+            "drop_rate": 0.4,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_gernet.py b/feature_extractor_models/encoders/timm_gernet.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0c3354d8043ea5a6e69053050485fcf0f66f2b1
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_gernet.py
@@ -0,0 +1,124 @@
+from timm.models import ByoModelCfg, ByoBlockCfg, ByobNet
+
+from ._base import EncoderMixin
+import torch.nn as nn
+
+
+class GERNetEncoder(ByobNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.head
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            self.stem,
+            self.stages[0],
+            self.stages[1],
+            self.stages[2],
+            nn.Sequential(self.stages[3], self.stages[4], self.final_conv),
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("head.fc.weight", None)
+        state_dict.pop("head.fc.bias", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+regnet_weights = {
+    "timm-gernet_s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_s-756b4751.pth"  # noqa
+    },
+    "timm-gernet_m": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_m-0873c53a.pth"  # noqa
+    },
+    "timm-gernet_l": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_l-f31e2e8d.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in regnet_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+timm_gernet_encoders = {
+    "timm-gernet_s": {
+        "encoder": GERNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-gernet_s"],
+        "params": {
+            "out_channels": (3, 13, 48, 48, 384, 1920),
+            "cfg": ByoModelCfg(
+                blocks=(
+                    ByoBlockCfg(type="basic", d=1, c=48, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="basic", d=3, c=48, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="bottle", d=7, c=384, s=2, gs=0, br=1 / 4),
+                    ByoBlockCfg(type="bottle", d=2, c=560, s=2, gs=1, br=3.0),
+                    ByoBlockCfg(type="bottle", d=1, c=256, s=1, gs=1, br=3.0),
+                ),
+                stem_chs=13,
+                stem_pool=None,
+                num_features=1920,
+            ),
+        },
+    },
+    "timm-gernet_m": {
+        "encoder": GERNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-gernet_m"],
+        "params": {
+            "out_channels": (3, 32, 128, 192, 640, 2560),
+            "cfg": ByoModelCfg(
+                blocks=(
+                    ByoBlockCfg(type="basic", d=1, c=128, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="basic", d=2, c=192, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="bottle", d=6, c=640, s=2, gs=0, br=1 / 4),
+                    ByoBlockCfg(type="bottle", d=4, c=640, s=2, gs=1, br=3.0),
+                    ByoBlockCfg(type="bottle", d=1, c=640, s=1, gs=1, br=3.0),
+                ),
+                stem_chs=32,
+                stem_pool=None,
+                num_features=2560,
+            ),
+        },
+    },
+    "timm-gernet_l": {
+        "encoder": GERNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-gernet_l"],
+        "params": {
+            "out_channels": (3, 32, 128, 192, 640, 2560),
+            "cfg": ByoModelCfg(
+                blocks=(
+                    ByoBlockCfg(type="basic", d=1, c=128, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="basic", d=2, c=192, s=2, gs=0, br=1.0),
+                    ByoBlockCfg(type="bottle", d=6, c=640, s=2, gs=0, br=1 / 4),
+                    ByoBlockCfg(type="bottle", d=5, c=640, s=2, gs=1, br=3.0),
+                    ByoBlockCfg(type="bottle", d=4, c=640, s=1, gs=1, br=3.0),
+                ),
+                stem_chs=32,
+                stem_pool=None,
+                num_features=2560,
+            ),
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_mobilenetv3.py b/feature_extractor_models/encoders/timm_mobilenetv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff733ab9c6dd03c8e4a887b46238b68901231e59
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_mobilenetv3.py
@@ -0,0 +1,151 @@
+import timm
+import numpy as np
+import torch.nn as nn
+
+from ._base import EncoderMixin
+
+
+def _make_divisible(x, divisible_by=8):
+    return int(np.ceil(x * 1.0 / divisible_by) * divisible_by)
+
+
+class MobileNetV3Encoder(nn.Module, EncoderMixin):
+    def __init__(self, model_name, width_mult, depth=5, **kwargs):
+        super().__init__()
+        if "large" not in model_name and "small" not in model_name:
+            raise ValueError("MobileNetV3 wrong model name {}".format(model_name))
+
+        self._mode = "small" if "small" in model_name else "large"
+        self._depth = depth
+        self._out_channels = self._get_channels(self._mode, width_mult)
+        self._in_channels = 3
+
+        # minimal models replace hardswish with relu
+        self.model = timm.create_model(
+            model_name=model_name,
+            scriptable=True,  # torch.jit scriptable
+            exportable=True,  # onnx export
+            features_only=True,
+        )
+
+    def _get_channels(self, mode, width_mult):
+        if mode == "small":
+            channels = [16, 16, 24, 48, 576]
+        else:
+            channels = [16, 24, 40, 112, 960]
+        channels = [3] + [_make_divisible(x * width_mult) for x in channels]
+        return tuple(channels)
+
+    def get_stages(self):
+        if self._mode == "small":
+            return [
+                nn.Identity(),
+                nn.Sequential(self.model.conv_stem, self.model.bn1, self.model.act1),
+                self.model.blocks[0],
+                self.model.blocks[1],
+                self.model.blocks[2:4],
+                self.model.blocks[4:],
+            ]
+        elif self._mode == "large":
+            return [
+                nn.Identity(),
+                nn.Sequential(
+                    self.model.conv_stem,
+                    self.model.bn1,
+                    self.model.act1,
+                    self.model.blocks[0],
+                ),
+                self.model.blocks[1],
+                self.model.blocks[2],
+                self.model.blocks[3:5],
+                self.model.blocks[5:],
+            ]
+        else:
+            ValueError(
+                "MobileNetV3 mode should be small or large, got {}".format(self._mode)
+            )
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("conv_head.weight", None)
+        state_dict.pop("conv_head.bias", None)
+        state_dict.pop("classifier.weight", None)
+        state_dict.pop("classifier.bias", None)
+        self.model.load_state_dict(state_dict, **kwargs)
+
+
+mobilenetv3_weights = {
+    "tf_mobilenetv3_large_075": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth"  # noqa
+    },
+    "tf_mobilenetv3_large_100": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth"  # noqa
+    },
+    "tf_mobilenetv3_large_minimal_100": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth"  # noqa
+    },
+    "tf_mobilenetv3_small_075": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth"  # noqa
+    },
+    "tf_mobilenetv3_small_100": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth"  # noqa
+    },
+    "tf_mobilenetv3_small_minimal_100": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in mobilenetv3_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "input_space": "RGB",
+        }
+
+
+timm_mobilenetv3_encoders = {
+    "timm-mobilenetv3_large_075": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_075"],
+        "params": {"model_name": "tf_mobilenetv3_large_075", "width_mult": 0.75},
+    },
+    "timm-mobilenetv3_large_100": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_100"],
+        "params": {"model_name": "tf_mobilenetv3_large_100", "width_mult": 1.0},
+    },
+    "timm-mobilenetv3_large_minimal_100": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_large_minimal_100"],
+        "params": {"model_name": "tf_mobilenetv3_large_minimal_100", "width_mult": 1.0},
+    },
+    "timm-mobilenetv3_small_075": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_075"],
+        "params": {"model_name": "tf_mobilenetv3_small_075", "width_mult": 0.75},
+    },
+    "timm-mobilenetv3_small_100": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_100"],
+        "params": {"model_name": "tf_mobilenetv3_small_100", "width_mult": 1.0},
+    },
+    "timm-mobilenetv3_small_minimal_100": {
+        "encoder": MobileNetV3Encoder,
+        "pretrained_settings": pretrained_settings["tf_mobilenetv3_small_minimal_100"],
+        "params": {"model_name": "tf_mobilenetv3_small_minimal_100", "width_mult": 1.0},
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_regnet.py b/feature_extractor_models/encoders/timm_regnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc60b8ba9c64d7ed7ebfefc66a7ad296c74c11e9
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_regnet.py
@@ -0,0 +1,350 @@
+from ._base import EncoderMixin
+from timm.models.regnet import RegNet, RegNetCfg
+import torch.nn as nn
+
+
+class RegNetEncoder(RegNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        kwargs["cfg"] = RegNetCfg(**kwargs["cfg"])
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.head
+
+    def get_stages(self):
+        return [nn.Identity(), self.stem, self.s1, self.s2, self.s3, self.s4]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("head.fc.weight", None)
+        state_dict.pop("head.fc.bias", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+regnet_weights = {
+    "timm-regnetx_002": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth"  # noqa
+    },
+    "timm-regnetx_004": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth"  # noqa
+    },
+    "timm-regnetx_006": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth"  # noqa
+    },
+    "timm-regnetx_008": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth"  # noqa
+    },
+    "timm-regnetx_016": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth"  # noqa
+    },
+    "timm-regnetx_032": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth"  # noqa
+    },
+    "timm-regnetx_040": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth"  # noqa
+    },
+    "timm-regnetx_064": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth"  # noqa
+    },
+    "timm-regnetx_080": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth"  # noqa
+    },
+    "timm-regnetx_120": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth"  # noqa
+    },
+    "timm-regnetx_160": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth"  # noqa
+    },
+    "timm-regnetx_320": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth"  # noqa
+    },
+    "timm-regnety_002": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth"  # noqa
+    },
+    "timm-regnety_004": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth"  # noqa
+    },
+    "timm-regnety_006": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth"  # noqa
+    },
+    "timm-regnety_008": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth"  # noqa
+    },
+    "timm-regnety_016": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth"  # noqa
+    },
+    "timm-regnety_032": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth"  # noqa
+    },
+    "timm-regnety_040": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth"  # noqa
+    },
+    "timm-regnety_064": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth"  # noqa
+    },
+    "timm-regnety_080": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth"  # noqa
+    },
+    "timm-regnety_120": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth"  # noqa
+    },
+    "timm-regnety_160": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth"  # noqa
+    },
+    "timm-regnety_320": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in regnet_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+# at this point I am too lazy to copy configs, so I just used the same configs from timm's repo
+
+
+def _mcfg(**kwargs):
+    cfg = dict(se_ratio=0.0, bottle_ratio=1.0, stem_width=32)
+    cfg.update(**kwargs)
+    return cfg
+
+
+timm_regnet_encoders = {
+    "timm-regnetx_002": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_002"],
+        "params": {
+            "out_channels": (3, 32, 24, 56, 152, 368),
+            "cfg": _mcfg(w0=24, wa=36.44, wm=2.49, group_size=8, depth=13),
+        },
+    },
+    "timm-regnetx_004": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_004"],
+        "params": {
+            "out_channels": (3, 32, 32, 64, 160, 384),
+            "cfg": _mcfg(w0=24, wa=24.48, wm=2.54, group_size=16, depth=22),
+        },
+    },
+    "timm-regnetx_006": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_006"],
+        "params": {
+            "out_channels": (3, 32, 48, 96, 240, 528),
+            "cfg": _mcfg(w0=48, wa=36.97, wm=2.24, group_size=24, depth=16),
+        },
+    },
+    "timm-regnetx_008": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_008"],
+        "params": {
+            "out_channels": (3, 32, 64, 128, 288, 672),
+            "cfg": _mcfg(w0=56, wa=35.73, wm=2.28, group_size=16, depth=16),
+        },
+    },
+    "timm-regnetx_016": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_016"],
+        "params": {
+            "out_channels": (3, 32, 72, 168, 408, 912),
+            "cfg": _mcfg(w0=80, wa=34.01, wm=2.25, group_size=24, depth=18),
+        },
+    },
+    "timm-regnetx_032": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_032"],
+        "params": {
+            "out_channels": (3, 32, 96, 192, 432, 1008),
+            "cfg": _mcfg(w0=88, wa=26.31, wm=2.25, group_size=48, depth=25),
+        },
+    },
+    "timm-regnetx_040": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_040"],
+        "params": {
+            "out_channels": (3, 32, 80, 240, 560, 1360),
+            "cfg": _mcfg(w0=96, wa=38.65, wm=2.43, group_size=40, depth=23),
+        },
+    },
+    "timm-regnetx_064": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_064"],
+        "params": {
+            "out_channels": (3, 32, 168, 392, 784, 1624),
+            "cfg": _mcfg(w0=184, wa=60.83, wm=2.07, group_size=56, depth=17),
+        },
+    },
+    "timm-regnetx_080": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_080"],
+        "params": {
+            "out_channels": (3, 32, 80, 240, 720, 1920),
+            "cfg": _mcfg(w0=80, wa=49.56, wm=2.88, group_size=120, depth=23),
+        },
+    },
+    "timm-regnetx_120": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_120"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 896, 2240),
+            "cfg": _mcfg(w0=168, wa=73.36, wm=2.37, group_size=112, depth=19),
+        },
+    },
+    "timm-regnetx_160": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_160"],
+        "params": {
+            "out_channels": (3, 32, 256, 512, 896, 2048),
+            "cfg": _mcfg(w0=216, wa=55.59, wm=2.1, group_size=128, depth=22),
+        },
+    },
+    "timm-regnetx_320": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnetx_320"],
+        "params": {
+            "out_channels": (3, 32, 336, 672, 1344, 2520),
+            "cfg": _mcfg(w0=320, wa=69.86, wm=2.0, group_size=168, depth=23),
+        },
+    },
+    # regnety
+    "timm-regnety_002": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_002"],
+        "params": {
+            "out_channels": (3, 32, 24, 56, 152, 368),
+            "cfg": _mcfg(
+                w0=24, wa=36.44, wm=2.49, group_size=8, depth=13, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_004": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_004"],
+        "params": {
+            "out_channels": (3, 32, 48, 104, 208, 440),
+            "cfg": _mcfg(
+                w0=48, wa=27.89, wm=2.09, group_size=8, depth=16, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_006": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_006"],
+        "params": {
+            "out_channels": (3, 32, 48, 112, 256, 608),
+            "cfg": _mcfg(
+                w0=48, wa=32.54, wm=2.32, group_size=16, depth=15, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_008": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_008"],
+        "params": {
+            "out_channels": (3, 32, 64, 128, 320, 768),
+            "cfg": _mcfg(
+                w0=56, wa=38.84, wm=2.4, group_size=16, depth=14, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_016": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_016"],
+        "params": {
+            "out_channels": (3, 32, 48, 120, 336, 888),
+            "cfg": _mcfg(
+                w0=48, wa=20.71, wm=2.65, group_size=24, depth=27, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_032": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_032"],
+        "params": {
+            "out_channels": (3, 32, 72, 216, 576, 1512),
+            "cfg": _mcfg(
+                w0=80, wa=42.63, wm=2.66, group_size=24, depth=21, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_040": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_040"],
+        "params": {
+            "out_channels": (3, 32, 128, 192, 512, 1088),
+            "cfg": _mcfg(
+                w0=96, wa=31.41, wm=2.24, group_size=64, depth=22, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_064": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_064"],
+        "params": {
+            "out_channels": (3, 32, 144, 288, 576, 1296),
+            "cfg": _mcfg(
+                w0=112, wa=33.22, wm=2.27, group_size=72, depth=25, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_080": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_080"],
+        "params": {
+            "out_channels": (3, 32, 168, 448, 896, 2016),
+            "cfg": _mcfg(
+                w0=192, wa=76.82, wm=2.19, group_size=56, depth=17, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_120": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_120"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 896, 2240),
+            "cfg": _mcfg(
+                w0=168, wa=73.36, wm=2.37, group_size=112, depth=19, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_160": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_160"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 1232, 3024),
+            "cfg": _mcfg(
+                w0=200, wa=106.23, wm=2.48, group_size=112, depth=18, se_ratio=0.25
+            ),
+        },
+    },
+    "timm-regnety_320": {
+        "encoder": RegNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-regnety_320"],
+        "params": {
+            "out_channels": (3, 32, 232, 696, 1392, 3712),
+            "cfg": _mcfg(
+                w0=232, wa=115.89, wm=2.53, group_size=232, depth=20, se_ratio=0.25
+            ),
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_res2net.py b/feature_extractor_models/encoders/timm_res2net.py
new file mode 100644
index 0000000000000000000000000000000000000000..e97043e357e3627de361e2f12e27b22bd3ca0d93
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_res2net.py
@@ -0,0 +1,163 @@
+from ._base import EncoderMixin
+from timm.models.resnet import ResNet
+from timm.models.res2net import Bottle2neck
+import torch.nn as nn
+
+
+class Res2NetEncoder(ResNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.fc
+        del self.global_pool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv1, self.bn1, self.act1),
+            nn.Sequential(self.maxpool, self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError("Res2Net encoders do not support dilated mode")
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+res2net_weights = {
+    "timm-res2net50_26w_4s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_4s-06e79181.pth"  # noqa
+    },
+    "timm-res2net50_48w_2s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_48w_2s-afed724a.pth"  # noqa
+    },
+    "timm-res2net50_14w_8s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_14w_8s-6527dddc.pth"  # noqa
+    },
+    "timm-res2net50_26w_6s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_6s-19041792.pth"  # noqa
+    },
+    "timm-res2net50_26w_8s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_8s-2c7c9f12.pth"  # noqa
+    },
+    "timm-res2net101_26w_4s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net101_26w_4s-02a759a1.pth"  # noqa
+    },
+    "timm-res2next50": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2next50_4s-6ef7e7bf.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in res2net_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+
+timm_res2net_encoders = {
+    "timm-res2net50_26w_4s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net50_26w_4s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 4},
+        },
+    },
+    "timm-res2net101_26w_4s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net101_26w_4s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 23, 3],
+            "base_width": 26,
+            "block_args": {"scale": 4},
+        },
+    },
+    "timm-res2net50_26w_6s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net50_26w_6s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 6},
+        },
+    },
+    "timm-res2net50_26w_8s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net50_26w_8s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 8},
+        },
+    },
+    "timm-res2net50_48w_2s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net50_48w_2s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 48,
+            "block_args": {"scale": 2},
+        },
+    },
+    "timm-res2net50_14w_8s": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2net50_14w_8s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 14,
+            "block_args": {"scale": 8},
+        },
+    },
+    "timm-res2next50": {
+        "encoder": Res2NetEncoder,
+        "pretrained_settings": pretrained_settings["timm-res2next50"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 4,
+            "cardinality": 8,
+            "block_args": {"scale": 4},
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_resnest.py b/feature_extractor_models/encoders/timm_resnest.py
new file mode 100644
index 0000000000000000000000000000000000000000..1599b6c8ace7a506ac12603c3439d04f5ca3185b
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_resnest.py
@@ -0,0 +1,208 @@
+from ._base import EncoderMixin
+from timm.models.resnet import ResNet
+from timm.models.resnest import ResNestBottleneck
+import torch.nn as nn
+
+
+class ResNestEncoder(ResNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.fc
+        del self.global_pool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv1, self.bn1, self.act1),
+            nn.Sequential(self.maxpool, self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError("ResNest encoders do not support dilated mode")
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+resnest_weights = {
+    "timm-resnest14d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest14-9c8fe254.pth"  # noqa
+    },
+    "timm-resnest26d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest26-50eb607c.pth"  # noqa
+    },
+    "timm-resnest50d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50-528c19ca.pth"  # noqa
+    },
+    "timm-resnest101e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest101-22405ba7.pth"  # noqa
+    },
+    "timm-resnest200e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest200-75117900.pth"  # noqa
+    },
+    "timm-resnest269e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest269-0cc87c48.pth"  # noqa
+    },
+    "timm-resnest50d_4s2x40d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_4s2x40d-41d14ed0.pth"  # noqa
+    },
+    "timm-resnest50d_1s4x24d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_1s4x24d-d4a4f76f.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in resnest_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+
+timm_resnest_encoders = {
+    "timm-resnest14d": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest14d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [1, 1, 1, 1],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest26d": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest26d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [2, 2, 2, 2],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest50d": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest50d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest101e": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest101e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 23, 3],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest200e": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest200e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 24, 36, 3],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest269e": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest269e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 30, 48, 8],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
+    },
+    "timm-resnest50d_4s2x40d": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest50d_4s2x40d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 40,
+            "cardinality": 2,
+            "block_args": {"radix": 4, "avd": True, "avd_first": True},
+        },
+    },
+    "timm-resnest50d_1s4x24d": {
+        "encoder": ResNestEncoder,
+        "pretrained_settings": pretrained_settings["timm-resnest50d_1s4x24d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 24,
+            "cardinality": 4,
+            "block_args": {"radix": 1, "avd": True, "avd_first": True},
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_sknet.py b/feature_extractor_models/encoders/timm_sknet.py
new file mode 100644
index 0000000000000000000000000000000000000000..14d6d2b083df50957e4293ec856cfb3ee2b5a4c0
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_sknet.py
@@ -0,0 +1,103 @@
+from ._base import EncoderMixin
+from timm.models.resnet import ResNet
+from timm.models.sknet import SelectiveKernelBottleneck, SelectiveKernelBasic
+import torch.nn as nn
+
+
+class SkNetEncoder(ResNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.fc
+        del self.global_pool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv1, self.bn1, self.act1),
+            nn.Sequential(self.maxpool, self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+sknet_weights = {
+    "timm-skresnet18": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth"  # noqa
+    },
+    "timm-skresnet34": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth"  # noqa
+    },
+    "timm-skresnext50_32x4d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth"  # noqa
+    },
+}
+
+pretrained_settings = {}
+for model_name, sources in sknet_weights.items():
+    pretrained_settings[model_name] = {}
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+timm_sknet_encoders = {
+    "timm-skresnet18": {
+        "encoder": SkNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-skresnet18"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": SelectiveKernelBasic,
+            "layers": [2, 2, 2, 2],
+            "zero_init_last": False,
+            "block_args": {"sk_kwargs": {"rd_ratio": 1 / 8, "split_input": True}},
+        },
+    },
+    "timm-skresnet34": {
+        "encoder": SkNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-skresnet34"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": SelectiveKernelBasic,
+            "layers": [3, 4, 6, 3],
+            "zero_init_last": False,
+            "block_args": {"sk_kwargs": {"rd_ratio": 1 / 8, "split_input": True}},
+        },
+    },
+    "timm-skresnext50_32x4d": {
+        "encoder": SkNetEncoder,
+        "pretrained_settings": pretrained_settings["timm-skresnext50_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SelectiveKernelBottleneck,
+            "layers": [3, 4, 6, 3],
+            "zero_init_last": False,
+            "cardinality": 32,
+            "base_width": 4,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/timm_universal.py b/feature_extractor_models/encoders/timm_universal.py
new file mode 100644
index 0000000000000000000000000000000000000000..9702a7c34d16e9402b142682ea828b020deb961d
--- /dev/null
+++ b/feature_extractor_models/encoders/timm_universal.py
@@ -0,0 +1,38 @@
+import timm
+import torch.nn as nn
+
+
+class TimmUniversalEncoder(nn.Module):
+    def __init__(self, name, pretrained=True, in_channels=3, depth=5, output_stride=32):
+        super().__init__()
+        kwargs = dict(
+            in_chans=in_channels,
+            features_only=True,
+            output_stride=output_stride,
+            pretrained=pretrained,
+            out_indices=tuple(range(depth)),
+        )
+
+        # not all models support output stride argument, drop it by default
+        if output_stride == 32:
+            kwargs.pop("output_stride")
+
+        self.model = timm.create_model(name, **kwargs)
+
+        self._in_channels = in_channels
+        self._out_channels = [in_channels] + self.model.feature_info.channels()
+        self._depth = depth
+        self._output_stride = output_stride
+
+    def forward(self, x):
+        features = self.model(x)
+        features = [x] + features
+        return features
+
+    @property
+    def out_channels(self):
+        return self._out_channels
+
+    @property
+    def output_stride(self):
+        return min(self._output_stride, 2**self._depth)
diff --git a/feature_extractor_models/encoders/vgg.py b/feature_extractor_models/encoders/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbc602c8e4ebbbed362893042e54843a692aabb3
--- /dev/null
+++ b/feature_extractor_models/encoders/vgg.py
@@ -0,0 +1,159 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+
+import torch.nn as nn
+from torchvision.models.vgg import VGG
+from torchvision.models.vgg import make_layers
+from pretrainedmodels.models.torchvision_models import pretrained_settings
+
+from ._base import EncoderMixin
+
+# fmt: off
+cfg = {
+    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+# fmt: on
+
+
+class VGGEncoder(VGG, EncoderMixin):
+    def __init__(self, out_channels, config, batch_norm=False, depth=5, **kwargs):
+        super().__init__(make_layers(config, batch_norm=batch_norm), **kwargs)
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+        del self.classifier
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError(
+            "'VGG' models do not support dilated mode due to Max Pooling"
+            " operations for downsampling!"
+        )
+
+    def get_stages(self):
+        stages = []
+        stage_modules = []
+        for module in self.features:
+            if isinstance(module, nn.MaxPool2d):
+                stages.append(nn.Sequential(*stage_modules))
+                stage_modules = []
+            stage_modules.append(module)
+        stages.append(nn.Sequential(*stage_modules))
+        return stages
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        keys = list(state_dict.keys())
+        for k in keys:
+            if k.startswith("classifier"):
+                state_dict.pop(k, None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+vgg_encoders = {
+    "vgg11": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg11"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["A"],
+            "batch_norm": False,
+        },
+    },
+    "vgg11_bn": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg11_bn"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["A"],
+            "batch_norm": True,
+        },
+    },
+    "vgg13": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg13"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["B"],
+            "batch_norm": False,
+        },
+    },
+    "vgg13_bn": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg13_bn"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["B"],
+            "batch_norm": True,
+        },
+    },
+    "vgg16": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg16"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["D"],
+            "batch_norm": False,
+        },
+    },
+    "vgg16_bn": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg16_bn"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["D"],
+            "batch_norm": True,
+        },
+    },
+    "vgg19": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg19"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["E"],
+            "batch_norm": False,
+        },
+    },
+    "vgg19_bn": {
+        "encoder": VGGEncoder,
+        "pretrained_settings": pretrained_settings["vgg19_bn"],
+        "params": {
+            "out_channels": (64, 128, 256, 512, 512, 512),
+            "config": cfg["E"],
+            "batch_norm": True,
+        },
+    },
+}
diff --git a/feature_extractor_models/encoders/xception.py b/feature_extractor_models/encoders/xception.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8c476ced33c36aa271fd0c362c635b240ff6c32
--- /dev/null
+++ b/feature_extractor_models/encoders/xception.py
@@ -0,0 +1,77 @@
+import torch.nn as nn
+
+from pretrainedmodels.models.xception import pretrained_settings
+from pretrainedmodels.models.xception import Xception
+
+from ._base import EncoderMixin
+
+
+class XceptionEncoder(Xception, EncoderMixin):
+    def __init__(self, out_channels, *args, depth=5, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._out_channels = out_channels
+        self._depth = depth
+        self._in_channels = 3
+
+        # modify padding to maintain output shape
+        self.conv1.padding = (1, 1)
+        self.conv2.padding = (1, 1)
+
+        del self.fc
+
+    def make_dilated(self, *args, **kwargs):
+        raise ValueError(
+            "Xception encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(
+                self.conv1, self.bn1, self.relu, self.conv2, self.bn2, self.relu
+            ),
+            self.block1,
+            self.block2,
+            nn.Sequential(
+                self.block3,
+                self.block4,
+                self.block5,
+                self.block6,
+                self.block7,
+                self.block8,
+                self.block9,
+                self.block10,
+                self.block11,
+            ),
+            nn.Sequential(
+                self.block12, self.conv3, self.bn3, self.relu, self.conv4, self.bn4
+            ),
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict):
+        # remove linear
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+
+        super().load_state_dict(state_dict)
+
+
+xception_encoders = {
+    "xception": {
+        "encoder": XceptionEncoder,
+        "pretrained_settings": pretrained_settings["xception"],
+        "params": {"out_channels": (3, 64, 128, 256, 728, 2048)},
+    }
+}
diff --git a/feature_extractor_models/utils/__init__.py b/feature_extractor_models/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4aeb8fa5d076e55c07f9dcabd4650e59f96b2db2
--- /dev/null
+++ b/feature_extractor_models/utils/__init__.py
@@ -0,0 +1,12 @@
+import warnings
+
+from . import train
+from . import losses
+from . import metrics
+
+__all__ = ["train", "losses", "metrics"]
+
+warnings.warn(
+    "`smp.utils` module is deprecated and will be removed in future releases.",
+    DeprecationWarning,
+)
diff --git a/feature_extractor_models/utils/base.py b/feature_extractor_models/utils/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..60def85de5ee5bf1575455fa5db75e0178e6369a
--- /dev/null
+++ b/feature_extractor_models/utils/base.py
@@ -0,0 +1,67 @@
+import re
+import torch.nn as nn
+
+
+class BaseObject(nn.Module):
+    def __init__(self, name=None):
+        super().__init__()
+        self._name = name
+
+    @property
+    def __name__(self):
+        if self._name is None:
+            name = self.__class__.__name__
+            s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
+            return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
+        else:
+            return self._name
+
+
+class Metric(BaseObject):
+    pass
+
+
+class Loss(BaseObject):
+    def __add__(self, other):
+        if isinstance(other, Loss):
+            return SumOfLosses(self, other)
+        else:
+            raise ValueError("Loss should be inherited from `Loss` class")
+
+    def __radd__(self, other):
+        return self.__add__(other)
+
+    def __mul__(self, value):
+        if isinstance(value, (int, float)):
+            return MultipliedLoss(self, value)
+        else:
+            raise ValueError("Loss should be inherited from `BaseLoss` class")
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+
+class SumOfLosses(Loss):
+    def __init__(self, l1, l2):
+        name = "{} + {}".format(l1.__name__, l2.__name__)
+        super().__init__(name=name)
+        self.l1 = l1
+        self.l2 = l2
+
+    def __call__(self, *inputs):
+        return self.l1.forward(*inputs) + self.l2.forward(*inputs)
+
+
+class MultipliedLoss(Loss):
+    def __init__(self, loss, multiplier):
+        # resolve name
+        if len(loss.__name__.split("+")) > 1:
+            name = "{} * ({})".format(multiplier, loss.__name__)
+        else:
+            name = "{} * {}".format(multiplier, loss.__name__)
+        super().__init__(name=name)
+        self.loss = loss
+        self.multiplier = multiplier
+
+    def __call__(self, *inputs):
+        return self.multiplier * self.loss.forward(*inputs)
diff --git a/feature_extractor_models/utils/functional.py b/feature_extractor_models/utils/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..261a1ee8fdd4ad55bc0de42e520b0a28a66b624e
--- /dev/null
+++ b/feature_extractor_models/utils/functional.py
@@ -0,0 +1,132 @@
+import torch
+
+
+def _take_channels(*xs, ignore_channels=None):
+    if ignore_channels is None:
+        return xs
+    else:
+        channels = [
+            channel
+            for channel in range(xs[0].shape[1])
+            if channel not in ignore_channels
+        ]
+        xs = [
+            torch.index_select(x, dim=1, index=torch.tensor(channels).to(x.device))
+            for x in xs
+        ]
+        return xs
+
+
+def _threshold(x, threshold=None):
+    if threshold is not None:
+        return (x > threshold).type(x.dtype)
+    else:
+        return x
+
+
+def iou(pr, gt, eps=1e-7, threshold=None, ignore_channels=None):
+    """Calculate Intersection over Union between ground truth and prediction
+    Args:
+        pr (torch.Tensor): predicted tensor
+        gt (torch.Tensor):  ground truth tensor
+        eps (float): epsilon to avoid zero division
+        threshold: threshold for outputs binarization
+    Returns:
+        float: IoU (Jaccard) score
+    """
+
+    pr = _threshold(pr, threshold=threshold)
+    pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
+
+    intersection = torch.sum(gt * pr)
+    union = torch.sum(gt) + torch.sum(pr) - intersection + eps
+    return (intersection + eps) / union
+
+
+jaccard = iou
+
+
+def f_score(pr, gt, beta=1, eps=1e-7, threshold=None, ignore_channels=None):
+    """Calculate F-score between ground truth and prediction
+    Args:
+        pr (torch.Tensor): predicted tensor
+        gt (torch.Tensor):  ground truth tensor
+        beta (float): positive constant
+        eps (float): epsilon to avoid zero division
+        threshold: threshold for outputs binarization
+    Returns:
+        float: F score
+    """
+
+    pr = _threshold(pr, threshold=threshold)
+    pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
+
+    tp = torch.sum(gt * pr)
+    fp = torch.sum(pr) - tp
+    fn = torch.sum(gt) - tp
+
+    score = ((1 + beta**2) * tp + eps) / ((1 + beta**2) * tp + beta**2 * fn + fp + eps)
+
+    return score
+
+
+def accuracy(pr, gt, threshold=0.5, ignore_channels=None):
+    """Calculate accuracy score between ground truth and prediction
+    Args:
+        pr (torch.Tensor): predicted tensor
+        gt (torch.Tensor):  ground truth tensor
+        eps (float): epsilon to avoid zero division
+        threshold: threshold for outputs binarization
+    Returns:
+        float: precision score
+    """
+    pr = _threshold(pr, threshold=threshold)
+    pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
+
+    tp = torch.sum(gt == pr, dtype=pr.dtype)
+    score = tp / gt.view(-1).shape[0]
+    return score
+
+
+def precision(pr, gt, eps=1e-7, threshold=None, ignore_channels=None):
+    """Calculate precision score between ground truth and prediction
+    Args:
+        pr (torch.Tensor): predicted tensor
+        gt (torch.Tensor):  ground truth tensor
+        eps (float): epsilon to avoid zero division
+        threshold: threshold for outputs binarization
+    Returns:
+        float: precision score
+    """
+
+    pr = _threshold(pr, threshold=threshold)
+    pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
+
+    tp = torch.sum(gt * pr)
+    fp = torch.sum(pr) - tp
+
+    score = (tp + eps) / (tp + fp + eps)
+
+    return score
+
+
+def recall(pr, gt, eps=1e-7, threshold=None, ignore_channels=None):
+    """Calculate Recall between ground truth and prediction
+    Args:
+        pr (torch.Tensor): A list of predicted elements
+        gt (torch.Tensor):  A list of elements that are to be predicted
+        eps (float): epsilon to avoid zero division
+        threshold: threshold for outputs binarization
+    Returns:
+        float: recall score
+    """
+
+    pr = _threshold(pr, threshold=threshold)
+    pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
+
+    tp = torch.sum(gt * pr)
+    fn = torch.sum(gt) - tp
+
+    score = (tp + eps) / (tp + fn + eps)
+
+    return score
diff --git a/feature_extractor_models/utils/losses.py b/feature_extractor_models/utils/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7a87c8eb972696db63f08b903a1b84168e1aea6
--- /dev/null
+++ b/feature_extractor_models/utils/losses.py
@@ -0,0 +1,69 @@
+import torch.nn as nn
+
+from . import base
+from . import functional as F
+from ..base.modules import Activation
+
+
+class JaccardLoss(base.Loss):
+    def __init__(self, eps=1.0, activation=None, ignore_channels=None, **kwargs):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return 1 - F.jaccard(
+            y_pr,
+            y_gt,
+            eps=self.eps,
+            threshold=None,
+            ignore_channels=self.ignore_channels,
+        )
+
+
+class DiceLoss(base.Loss):
+    def __init__(
+        self, eps=1.0, beta=1.0, activation=None, ignore_channels=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.beta = beta
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return 1 - F.f_score(
+            y_pr,
+            y_gt,
+            beta=self.beta,
+            eps=self.eps,
+            threshold=None,
+            ignore_channels=self.ignore_channels,
+        )
+
+
+class L1Loss(nn.L1Loss, base.Loss):
+    pass
+
+
+class MSELoss(nn.MSELoss, base.Loss):
+    pass
+
+
+class CrossEntropyLoss(nn.CrossEntropyLoss, base.Loss):
+    pass
+
+
+class NLLLoss(nn.NLLLoss, base.Loss):
+    pass
+
+
+class BCELoss(nn.BCELoss, base.Loss):
+    pass
+
+
+class BCEWithLogitsLoss(nn.BCEWithLogitsLoss, base.Loss):
+    pass
diff --git a/feature_extractor_models/utils/meter.py b/feature_extractor_models/utils/meter.py
new file mode 100644
index 0000000000000000000000000000000000000000..9faa720cef0368474b8238d6a530cb2bfd5164f3
--- /dev/null
+++ b/feature_extractor_models/utils/meter.py
@@ -0,0 +1,61 @@
+import numpy as np
+
+
+class Meter(object):
+    """Meters provide a way to keep track of important statistics in an online manner.
+    This class is abstract, but provides a standard interface for all meters to follow.
+    """
+
+    def reset(self):
+        """Reset the meter to default settings."""
+        pass
+
+    def add(self, value):
+        """Log a new value to the meter
+        Args:
+            value: Next result to include.
+        """
+        pass
+
+    def value(self):
+        """Get the value of the meter in the current state."""
+        pass
+
+
+class AverageValueMeter(Meter):
+    def __init__(self):
+        super(AverageValueMeter, self).__init__()
+        self.reset()
+        self.val = 0
+
+    def add(self, value, n=1):
+        self.val = value
+        self.sum += value
+        self.var += value * value
+        self.n += n
+
+        if self.n == 0:
+            self.mean, self.std = np.nan, np.nan
+        elif self.n == 1:
+            self.mean = 0.0 + self.sum  # This is to force a copy in torch/numpy
+            self.std = np.inf
+            self.mean_old = self.mean
+            self.m_s = 0.0
+        else:
+            self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
+            self.m_s += (value - self.mean_old) * (value - self.mean)
+            self.mean_old = self.mean
+            self.std = np.sqrt(self.m_s / (self.n - 1.0))
+
+    def value(self):
+        return self.mean, self.std
+
+    def reset(self):
+        self.n = 0
+        self.sum = 0.0
+        self.var = 0.0
+        self.val = 0.0
+        self.mean = np.nan
+        self.mean_old = 0.0
+        self.m_s = 0.0
+        self.std = np.nan
diff --git a/feature_extractor_models/utils/metrics.py b/feature_extractor_models/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..65df21708cec34d076d9df6bb6bd4755fd3bbccb
--- /dev/null
+++ b/feature_extractor_models/utils/metrics.py
@@ -0,0 +1,111 @@
+from . import base
+from . import functional as F
+from ..base.modules import Activation
+
+
+class IoU(base.Metric):
+    __name__ = "iou_score"
+
+    def __init__(
+        self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.threshold = threshold
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return F.iou(
+            y_pr,
+            y_gt,
+            eps=self.eps,
+            threshold=self.threshold,
+            ignore_channels=self.ignore_channels,
+        )
+
+
+class Fscore(base.Metric):
+    def __init__(
+        self,
+        beta=1,
+        eps=1e-7,
+        threshold=0.5,
+        activation=None,
+        ignore_channels=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.beta = beta
+        self.threshold = threshold
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return F.f_score(
+            y_pr,
+            y_gt,
+            eps=self.eps,
+            beta=self.beta,
+            threshold=self.threshold,
+            ignore_channels=self.ignore_channels,
+        )
+
+
+class Accuracy(base.Metric):
+    def __init__(self, threshold=0.5, activation=None, ignore_channels=None, **kwargs):
+        super().__init__(**kwargs)
+        self.threshold = threshold
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return F.accuracy(
+            y_pr, y_gt, threshold=self.threshold, ignore_channels=self.ignore_channels
+        )
+
+
+class Recall(base.Metric):
+    def __init__(
+        self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.threshold = threshold
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return F.recall(
+            y_pr,
+            y_gt,
+            eps=self.eps,
+            threshold=self.threshold,
+            ignore_channels=self.ignore_channels,
+        )
+
+
+class Precision(base.Metric):
+    def __init__(
+        self, eps=1e-7, threshold=0.5, activation=None, ignore_channels=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.eps = eps
+        self.threshold = threshold
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+
+    def forward(self, y_pr, y_gt):
+        y_pr = self.activation(y_pr)
+        return F.precision(
+            y_pr,
+            y_gt,
+            eps=self.eps,
+            threshold=self.threshold,
+            ignore_channels=self.ignore_channels,
+        )
diff --git a/feature_extractor_models/utils/train.py b/feature_extractor_models/utils/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c087c6b0691ab665021e613f700ad209e86b076
--- /dev/null
+++ b/feature_extractor_models/utils/train.py
@@ -0,0 +1,116 @@
+import sys
+import torch
+from tqdm import tqdm as tqdm
+from .meter import AverageValueMeter
+
+
+class Epoch:
+    def __init__(self, model, loss, metrics, stage_name, device="cpu", verbose=True):
+        self.model = model
+        self.loss = loss
+        self.metrics = metrics
+        self.stage_name = stage_name
+        self.verbose = verbose
+        self.device = device
+
+        self._to_device()
+
+    def _to_device(self):
+        self.model.to(self.device)
+        self.loss.to(self.device)
+        for metric in self.metrics:
+            metric.to(self.device)
+
+    def _format_logs(self, logs):
+        str_logs = ["{} - {:.4}".format(k, v) for k, v in logs.items()]
+        s = ", ".join(str_logs)
+        return s
+
+    def batch_update(self, x, y):
+        raise NotImplementedError
+
+    def on_epoch_start(self):
+        pass
+
+    def run(self, dataloader):
+        self.on_epoch_start()
+
+        logs = {}
+        loss_meter = AverageValueMeter()
+        metrics_meters = {
+            metric.__name__: AverageValueMeter() for metric in self.metrics
+        }
+
+        with tqdm(
+            dataloader,
+            desc=self.stage_name,
+            file=sys.stdout,
+            disable=not (self.verbose),
+        ) as iterator:
+            for x, y in iterator:
+                x, y = x.to(self.device), y.to(self.device)
+                loss, y_pred = self.batch_update(x, y)
+
+                # update loss logs
+                loss_value = loss.cpu().detach().numpy()
+                loss_meter.add(loss_value)
+                loss_logs = {self.loss.__name__: loss_meter.mean}
+                logs.update(loss_logs)
+
+                # update metrics logs
+                for metric_fn in self.metrics:
+                    metric_value = metric_fn(y_pred, y).cpu().detach().numpy()
+                    metrics_meters[metric_fn.__name__].add(metric_value)
+                metrics_logs = {k: v.mean for k, v in metrics_meters.items()}
+                logs.update(metrics_logs)
+
+                if self.verbose:
+                    s = self._format_logs(logs)
+                    iterator.set_postfix_str(s)
+
+        return logs
+
+
+class TrainEpoch(Epoch):
+    def __init__(self, model, loss, metrics, optimizer, device="cpu", verbose=True):
+        super().__init__(
+            model=model,
+            loss=loss,
+            metrics=metrics,
+            stage_name="train",
+            device=device,
+            verbose=verbose,
+        )
+        self.optimizer = optimizer
+
+    def on_epoch_start(self):
+        self.model.train()
+
+    def batch_update(self, x, y):
+        self.optimizer.zero_grad()
+        prediction = self.model.forward(x)
+        loss = self.loss(prediction, y)
+        loss.backward()
+        self.optimizer.step()
+        return loss, prediction
+
+
+class ValidEpoch(Epoch):
+    def __init__(self, model, loss, metrics, device="cpu", verbose=True):
+        super().__init__(
+            model=model,
+            loss=loss,
+            metrics=metrics,
+            stage_name="valid",
+            device=device,
+            verbose=verbose,
+        )
+
+    def on_epoch_start(self):
+        self.model.eval()
+
+    def batch_update(self, x, y):
+        with torch.no_grad():
+            prediction = self.model.forward(x)
+            loss = self.loss(prediction, y)
+        return loss, prediction
diff --git a/logger.py b/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..56b4d718091ad22d84d59387ca76628aa242555e
--- /dev/null
+++ b/logger.py
@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from pathlib import Path
+import logging
+
+import pytorch_lightning  # noqa: F401
+
+
+formatter = logging.Formatter(
+    fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+handler = logging.StreamHandler()
+handler.setFormatter(formatter)
+handler.setLevel(logging.INFO)
+
+logger = logging.getLogger("maploc")
+logger.setLevel(logging.INFO)
+logger.addHandler(handler)
+logger.propagate = False
+
+pl_logger = logging.getLogger("pytorch_lightning")
+if len(pl_logger.handlers):
+    pl_logger.handlers[0].setFormatter(formatter)
+
+repo_dir = Path(__file__).parent
+EXPERIMENTS_PATH = repo_dir / "experiments/"
+DATASETS_PATH = repo_dir / "datasets/"
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6de8926cef52331393188c986e483179d50472bb
--- /dev/null
+++ b/models/__init__.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
+# https://github.com/cvg/pixloc
+# Released under the Apache License 2.0
+
+import inspect
+
+from .base import BaseModel
+
+def get_class(mod_name, base_path, BaseClass):
+    """Get the class object which inherits from BaseClass and is defined in
+    the module named mod_name, child of base_path.
+    """
+    mod_path = "{}.{}".format(base_path, mod_name)
+    mod = __import__(mod_path, fromlist=[""])
+    classes = inspect.getmembers(mod, inspect.isclass)
+    # Filter classes defined in the module
+    classes = [c for c in classes if c[1].__module__ == mod_path]
+    # Filter classes inherited from BaseModel
+    classes = [c for c in classes if issubclass(c[1], BaseClass)]
+    assert len(classes) == 1, classes
+    return classes[0][1]
+
+
+def get_model(name):
+    if name == "localizer":
+        name = "localizer_basic"
+    elif name == "rotation_localizer":
+        name = "localizer_basic_rotation"
+    elif name == "bev_localizer":
+        name = "localizer_bev_plane"
+    return get_class(name, __name__, BaseModel)
diff --git a/models/base.py b/models/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9978eabb3c32fcd98f12399347f4c864e463494
--- /dev/null
+++ b/models/base.py
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
+# https://github.com/cvg/pixloc
+# Released under the Apache License 2.0
+
+"""
+Base class for trainable models.
+"""
+
+from abc import ABCMeta, abstractmethod
+from copy import copy
+
+import omegaconf
+from omegaconf import OmegaConf
+from torch import nn
+
+
+class BaseModel(nn.Module, metaclass=ABCMeta):
+    """
+    What the child model is expect to declare:
+        default_conf: dictionary of the default configuration of the model.
+        It recursively updates the default_conf of all parent classes, and
+        it is updated by the user-provided configuration passed to __init__.
+        Configurations can be nested.
+
+        required_data_keys: list of expected keys in the input data dictionary.
+
+        strict_conf (optional): boolean. If false, BaseModel does not raise
+        an error when the user provides an unknown configuration entry.
+
+        _init(self, conf): initialization method, where conf is the final
+        configuration object (also accessible with `self.conf`). Accessing
+        unknown configuration entries will raise an error.
+
+        _forward(self, data): method that returns a dictionary of batched
+        prediction tensors based on a dictionary of batched input data tensors.
+
+        loss(self, pred, data): method that returns a dictionary of losses,
+        computed from model predictions and input data. Each loss is a batch
+        of scalars, i.e. a torch.Tensor of shape (B,).
+        The total loss to be optimized has the key `'total'`.
+
+        metrics(self, pred, data): method that returns a dictionary of metrics,
+        each as a batch of scalars.
+    """
+
+    base_default_conf = {
+        "name": None,
+        "trainable": True,  # if false: do not optimize this model parameters
+        "freeze_batch_normalization": False,  # use test-time statistics
+    }
+    default_conf = {}
+    required_data_keys = []
+    strict_conf = True
+
+    def __init__(self, conf):
+        """Perform some logic and call the _init method of the child model."""
+        super().__init__()
+        default_conf = OmegaConf.merge(
+            self.base_default_conf, OmegaConf.create(self.default_conf)
+        )
+        if self.strict_conf:
+            OmegaConf.set_struct(default_conf, True)
+
+        # fixme: backward compatibility
+        if "pad" in conf and "pad" not in default_conf:  # backward compat.
+            with omegaconf.read_write(conf):
+                with omegaconf.open_dict(conf):
+                    conf["interpolation"] = {"pad": conf.pop("pad")}
+
+        if isinstance(conf, dict):
+            conf = OmegaConf.create(conf)
+        self.conf = conf = OmegaConf.merge(default_conf, conf)
+        OmegaConf.set_readonly(conf, True)
+        OmegaConf.set_struct(conf, True)
+        self.required_data_keys = copy(self.required_data_keys)
+        self._init(conf)
+
+        if not conf.trainable:
+            for p in self.parameters():
+                p.requires_grad = False
+
+    def train(self, mode=True):
+        super().train(mode)
+
+        def freeze_bn(module):
+            if isinstance(module, nn.modules.batchnorm._BatchNorm):
+                module.eval()
+
+        if self.conf.freeze_batch_normalization:
+            self.apply(freeze_bn)
+
+        return self
+
+    def forward(self, data):
+        """Check the data and call the _forward method of the child model."""
+
+        def recursive_key_check(expected, given):
+            for key in expected:
+                assert key in given, f"Missing key {key} in data"
+                if isinstance(expected, dict):
+                    recursive_key_check(expected[key], given[key])
+
+        recursive_key_check(self.required_data_keys, data)
+        return self._forward(data)
+
+    @abstractmethod
+    def _init(self, conf):
+        """To be implemented by the child class."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def _forward(self, data):
+        """To be implemented by the child class."""
+        raise NotImplementedError
+
+    def loss(self, pred, data):
+        """To be implemented by the child class."""
+        raise NotImplementedError
+
+    def metrics(self):
+        return {}  # no metrics
diff --git a/models/bev_net.py b/models/bev_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..35056ca9a1d08f64262f7921fa2dcb4278d73530
--- /dev/null
+++ b/models/bev_net.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import torch.nn as nn
+from torchvision.models.resnet import Bottleneck
+
+from .base import BaseModel
+from .feature_extractor import AdaptationBlock
+from .utils import checkpointed
+
+
+class BEVNet(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "num_blocks": "???",
+        "latent_dim": "???",
+        "input_dim": "${.latent_dim}",
+        "output_dim": "${.latent_dim}",
+        "confidence": False,
+        "norm_layer": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "checkpointed": False,  # whether to use gradient checkpointing
+        "padding": "zeros",
+    }
+
+    def _init(self, conf):
+        blocks = []
+        Block = checkpointed(Bottleneck, do=conf.checkpointed)
+        for i in range(conf.num_blocks):
+            dim = conf.input_dim if i == 0 else conf.latent_dim
+            blocks.append(
+                Block(
+                    dim,
+                    conf.latent_dim // Bottleneck.expansion,
+                    norm_layer=eval(conf.norm_layer),
+                )
+            )
+        self.blocks = nn.Sequential(*blocks)
+        self.output_layer = AdaptationBlock(conf.latent_dim, conf.output_dim)
+        if conf.confidence:
+            self.confidence_layer = AdaptationBlock(conf.latent_dim, 1)
+
+        def update_padding(module):
+            if isinstance(module, nn.Conv2d):
+                module.padding_mode = conf.padding
+
+        if conf.padding != "zeros":
+            self.bocks.apply(update_padding)
+
+    def _forward(self, data):
+        features = self.blocks(data["input"])
+        pred = {
+            "output": self.output_layer(features),
+        }
+        if self.conf.confidence:
+            pred["confidence"] = self.confidence_layer(features).squeeze(1).sigmoid()
+        return pred
+
+    def loss(self, pred, data):
+        raise NotImplementedError
+
+    def metrics(self, pred, data):
+        raise NotImplementedError
diff --git a/models/bev_projection.py b/models/bev_projection.py
new file mode 100644
index 0000000000000000000000000000000000000000..af49c115d4bf4a2aa0d71f1a111672908bbd839a
--- /dev/null
+++ b/models/bev_projection.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import torch
+from torch.nn.functional import grid_sample
+
+from utils.geometry import from_homogeneous
+from models.utils import make_grid
+
+
+class PolarProjectionDepth(torch.nn.Module):
+    def __init__(self, z_max, ppm, scale_range, z_min=None):
+        super().__init__()
+        self.z_max = z_max
+        self.Δ = Δ = 1 / ppm
+        self.z_min = z_min = Δ if z_min is None else z_min
+        self.scale_range = scale_range
+        z_steps = torch.arange(z_min, z_max + Δ, Δ)
+        self.register_buffer("depth_steps", z_steps, persistent=False)
+
+    def sample_depth_scores(self, pixel_scales, camera):
+        scale_steps = camera.f[..., None, 1] / self.depth_steps.flip(-1)
+        log_scale_steps = torch.log2(scale_steps)
+        scale_min, scale_max = self.scale_range
+        log_scale_norm = (log_scale_steps - scale_min) / (scale_max - scale_min)
+        log_scale_norm = log_scale_norm * 2 - 1  # in [-1, 1]
+
+        values = pixel_scales.flatten(1, 2).unsqueeze(-1)
+        indices = log_scale_norm.unsqueeze(-1)
+        indices = torch.stack([torch.zeros_like(indices), indices], -1)
+        depth_scores = grid_sample(values, indices, align_corners=True)
+        depth_scores = depth_scores.reshape(
+            pixel_scales.shape[:-1] + (len(self.depth_steps),)
+        )
+        return depth_scores
+
+    def forward(
+        self,
+        image,
+        pixel_scales,
+        camera,
+        return_total_score=False,
+    ):
+        depth_scores = self.sample_depth_scores(pixel_scales, camera)
+        depth_prob = torch.softmax(depth_scores, dim=1)
+        image_polar = torch.einsum("...dhw,...hwz->...dzw", image, depth_prob)
+        if return_total_score:
+            cell_score = torch.logsumexp(depth_scores, dim=1, keepdim=True)
+            return image_polar, cell_score.squeeze(1)
+        return image_polar
+
+
+class CartesianProjection(torch.nn.Module):
+    def __init__(self, z_max, x_max, ppm, z_min=None):
+        super().__init__()
+        self.z_max = z_max
+        self.x_max = x_max
+        self.Δ = Δ = 1 / ppm
+        self.z_min = z_min = Δ if z_min is None else z_min
+
+        grid_xz = make_grid(
+            x_max * 2 + Δ, z_max, step_y=Δ, step_x=Δ, orig_y=Δ, orig_x=-x_max, y_up=True
+        )
+        self.register_buffer("grid_xz", grid_xz, persistent=False)
+
+    def grid_to_polar(self, cam):
+        f, c = cam.f[..., 0][..., None, None], cam.c[..., 0][..., None, None]
+        u = from_homogeneous(self.grid_xz).squeeze(-1) * f + c
+        z_idx = (self.grid_xz[..., 1] - self.z_min) / self.Δ  # convert z value to index
+        z_idx = z_idx[None].expand_as(u)
+        grid_polar = torch.stack([u, z_idx], -1)
+        return grid_polar
+
+    def sample_from_polar(self, image_polar, valid_polar, grid_uz):
+        size = grid_uz.new_tensor(image_polar.shape[-2:][::-1])
+        grid_uz_norm = (grid_uz + 0.5) / size * 2 - 1
+        grid_uz_norm = grid_uz_norm * grid_uz.new_tensor([1, -1])  # y axis is up
+        image_bev = grid_sample(image_polar, grid_uz_norm, align_corners=False)
+
+        if valid_polar is None:
+            valid = torch.ones_like(image_polar[..., :1, :, :])
+        else:
+            valid = valid_polar.to(image_polar)[:, None]
+        valid = grid_sample(valid, grid_uz_norm, align_corners=False)
+        valid = valid.squeeze(1) > (1 - 1e-4)
+
+        return image_bev, valid
+
+    def forward(self, image_polar, valid_polar, cam):
+        grid_uz = self.grid_to_polar(cam)
+        image, valid = self.sample_from_polar(image_polar, valid_polar, grid_uz)
+        return image, valid, grid_uz
diff --git a/models/feature_extractor.py b/models/feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..1729f4668d9f983ccbad6db2d4faa98e9a4f0915
--- /dev/null
+++ b/models/feature_extractor.py
@@ -0,0 +1,232 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
+# https://github.com/cvg/pixloc
+# Released under the Apache License 2.0
+
+"""
+Flexible UNet model which takes any Torchvision backbone as encoder.
+Predicts multi-level feature and makes sure that they are well aligned.
+"""
+
+import torch
+import torch.nn as nn
+import torchvision
+
+from .base import BaseModel
+from .utils import checkpointed
+
+
+class DecoderBlock(nn.Module):
+    def __init__(
+        self, previous, skip, out, num_convs=1, norm=nn.BatchNorm2d, padding="zeros"
+    ):
+        super().__init__()
+
+        self.upsample = nn.Upsample(
+            scale_factor=2, mode="bilinear", align_corners=False
+        )
+
+        layers = []
+        for i in range(num_convs):
+            conv = nn.Conv2d(
+                previous + skip if i == 0 else out,
+                out,
+                kernel_size=3,
+                padding=1,
+                bias=norm is None,
+                padding_mode=padding,
+            )
+            layers.append(conv)
+            if norm is not None:
+                layers.append(norm(out))
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, previous, skip):
+        upsampled = self.upsample(previous)
+        # If the shape of the input map `skip` is not a multiple of 2,
+        # it will not match the shape of the upsampled map `upsampled`.
+        # If the downsampling uses ceil_mode=False, we nedd to crop `skip`.
+        # If it uses ceil_mode=True (not supported here), we should pad it.
+        _, _, hu, wu = upsampled.shape
+        _, _, hs, ws = skip.shape
+        assert (hu <= hs) and (wu <= ws), "Using ceil_mode=True in pooling?"
+        # assert (hu == hs) and (wu == ws), 'Careful about padding'
+        skip = skip[:, :, :hu, :wu]
+        return self.layers(torch.cat([upsampled, skip], dim=1))
+
+
+class AdaptationBlock(nn.Sequential):
+    def __init__(self, inp, out):
+        conv = nn.Conv2d(inp, out, kernel_size=1, padding=0, bias=True)
+        super().__init__(conv)
+
+
+class FeatureExtractor(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "input_dim": 3,
+        "output_scales": [0, 2, 4],  # what scales to adapt and output
+        "output_dim": 128,  # # of channels in output feature maps
+        "encoder": "vgg16",  # string (torchvision net) or list of channels
+        "num_downsample": 4,  # how many downsample block (if VGG-style net)
+        "decoder": [64, 64, 64, 64],  # list of channels of decoder
+        "decoder_norm": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "do_average_pooling": False,
+        "checkpointed": False,  # whether to use gradient checkpointing
+        "padding": "zeros"
+    }
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+
+    def build_encoder(self, conf):
+        assert isinstance(conf.encoder, str)
+        if conf.pretrained:
+            assert conf.input_dim == 3
+        Encoder = getattr(torchvision.models, conf.encoder)
+        encoder = Encoder(weights="DEFAULT" if conf.pretrained else None)
+        Block = checkpointed(torch.nn.Sequential, do=conf.checkpointed)
+        assert max(conf.output_scales) <= conf.num_downsample
+
+        if conf.encoder.startswith("vgg"):
+            # Parse the layers and pack them into downsampling blocks
+            # It's easy for VGG-style nets because of their linear structure.
+            # This does not handle strided convs and residual connections
+            skip_dims = []
+            previous_dim = None
+            blocks = [[]]
+            for i, layer in enumerate(encoder.features):
+                if isinstance(layer, torch.nn.Conv2d):
+                    # Change the first conv layer if the input dim mismatches
+                    if i == 0 and conf.input_dim != layer.in_channels:
+                        args = {k: getattr(layer, k) for k in layer.__constants__}
+                        args.pop("output_padding")
+                        layer = torch.nn.Conv2d(
+                            **{**args, "in_channels": conf.input_dim}
+                        )
+                    previous_dim = layer.out_channels
+                elif isinstance(layer, torch.nn.MaxPool2d):
+                    assert previous_dim is not None
+                    skip_dims.append(previous_dim)
+                    if (conf.num_downsample + 1) == len(blocks):
+                        break
+                    blocks.append([])  # start a new block
+                    if conf.do_average_pooling:
+                        assert layer.dilation == 1
+                        layer = torch.nn.AvgPool2d(
+                            kernel_size=layer.kernel_size,
+                            stride=layer.stride,
+                            padding=layer.padding,
+                            ceil_mode=layer.ceil_mode,
+                            count_include_pad=False,
+                        )
+                blocks[-1].append(layer)
+            encoder = [Block(*b) for b in blocks]
+        elif conf.encoder.startswith("resnet"):
+            # Manually define the ResNet blocks such that the downsampling comes first
+            assert conf.encoder[len("resnet") :] in ["18", "34", "50", "101"]
+            assert conf.input_dim == 3, "Unsupported for now."
+            block1 = torch.nn.Sequential(encoder.conv1, encoder.bn1, encoder.relu)
+            block2 = torch.nn.Sequential(encoder.maxpool, encoder.layer1)
+            block3 = encoder.layer2
+            block4 = encoder.layer3
+            block5 = encoder.layer4
+            blocks = [block1, block2, block3, block4, block5]
+            # Extract the output dimension of each block
+            skip_dims = [encoder.conv1.out_channels]
+            for i in range(1, 5):
+                modules = getattr(encoder, f"layer{i}")[-1]._modules
+                conv = sorted(k for k in modules if k.startswith("conv"))[-1]
+                skip_dims.append(modules[conv].out_channels)
+            # Add a dummy block such that the first one does not downsample
+            encoder = [torch.nn.Identity()] + [Block(b) for b in blocks]
+            skip_dims = [3] + skip_dims
+            # Trim based on the requested encoder size
+            encoder = encoder[: conf.num_downsample + 1]
+            skip_dims = skip_dims[: conf.num_downsample + 1]
+        else:
+            raise NotImplementedError(conf.encoder)
+
+        assert (conf.num_downsample + 1) == len(encoder)
+        encoder = nn.ModuleList(encoder)
+
+        return encoder, skip_dims
+
+    def _init(self, conf):
+        # Encoder
+        self.encoder, skip_dims = self.build_encoder(conf)
+        self.skip_dims = skip_dims
+
+        def update_padding(module):
+            if isinstance(module, nn.Conv2d):
+                module.padding_mode = conf.padding
+
+        if conf.padding != "zeros":
+            self.encoder.apply(update_padding)
+
+        # Decoder
+        if conf.decoder is not None:
+            assert len(conf.decoder) == (len(skip_dims) - 1)
+            Block = checkpointed(DecoderBlock, do=conf.checkpointed)
+            norm = eval(conf.decoder_norm) if conf.decoder_norm else None  # noqa
+
+            previous = skip_dims[-1]
+            decoder = []
+            for out, skip in zip(conf.decoder, skip_dims[:-1][::-1]):
+                decoder.append(
+                    Block(previous, skip, out, norm=norm, padding=conf.padding)
+                )
+                previous = out
+            self.decoder = nn.ModuleList(decoder)
+
+        # Adaptation layers
+        adaptation = []
+        for idx, i in enumerate(conf.output_scales):
+            if conf.decoder is None or i == (len(self.encoder) - 1):
+                input_ = skip_dims[i]
+            else:
+                input_ = conf.decoder[-1 - i]
+
+            # out_dim can be an int (same for all scales) or a list (per scale)
+            dim = conf.output_dim
+            if not isinstance(dim, int):
+                dim = dim[idx]
+
+            block = AdaptationBlock(input_, dim)
+            adaptation.append(block)
+        self.adaptation = nn.ModuleList(adaptation)
+        self.scales = [2**s for s in conf.output_scales]
+
+    def _forward(self, data):
+        image = data["image"]
+        if self.conf.pretrained:
+            mean, std = image.new_tensor(self.mean), image.new_tensor(self.std)
+            image = (image - mean[:, None, None]) / std[:, None, None]
+
+        skip_features = []
+        features = image
+        for block in self.encoder:
+            # print("block features",block)
+            features = block(features)
+            skip_features.append(features)
+
+        if self.conf.decoder:
+            pre_features = [skip_features[-1]]
+            for block, skip in zip(self.decoder, skip_features[:-1][::-1]):
+                pre_features.append(block(pre_features[-1], skip))
+            pre_features = pre_features[::-1]  # fine to coarse
+        else:
+            pre_features = skip_features
+
+        out_features = []
+        for adapt, i in zip(self.adaptation, self.conf.output_scales):
+            out_features.append(adapt(pre_features[i]))
+        pred = {"feature_maps": out_features, "skip_features": skip_features}
+        return pred
+
+    def loss(self, pred, data):
+        raise NotImplementedError
+
+    def metrics(self, pred, data):
+        raise NotImplementedError
diff --git a/models/feature_extractor_v2.py b/models/feature_extractor_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee238c8aa12cc0dd47431dea87e28a96622a9128
--- /dev/null
+++ b/models/feature_extractor_v2.py
@@ -0,0 +1,194 @@
+import logging
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+from torchvision.models.feature_extraction import create_feature_extractor
+
+from .base import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+class DecoderBlock(nn.Module):
+    def __init__(
+        self, previous, out, ksize=3, num_convs=1, norm=nn.BatchNorm2d, padding="zeros"
+    ):
+        super().__init__()
+        layers = []
+        for i in range(num_convs):
+            conv = nn.Conv2d(
+                previous if i == 0 else out,
+                out,
+                kernel_size=ksize,
+                padding=ksize // 2,
+                bias=norm is None,
+                padding_mode=padding,
+            )
+            layers.append(conv)
+            if norm is not None:
+                layers.append(norm(out))
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, previous, skip):
+        _, _, hp, wp = previous.shape
+        _, _, hs, ws = skip.shape
+        scale = 2 ** np.round(np.log2(np.array([hs / hp, ws / wp])))
+        upsampled = nn.functional.interpolate(
+            previous, scale_factor=scale.tolist(), mode="bilinear", align_corners=False
+        )
+        # If the shape of the input map `skip` is not a multiple of 2,
+        # it will not match the shape of the upsampled map `upsampled`.
+        # If the downsampling uses ceil_mode=False, we nedd to crop `skip`.
+        # If it uses ceil_mode=True (not supported here), we should pad it.
+        _, _, hu, wu = upsampled.shape
+        _, _, hs, ws = skip.shape
+        if (hu <= hs) and (wu <= ws):
+            skip = skip[:, :, :hu, :wu]
+        elif (hu >= hs) and (wu >= ws):
+            skip = nn.functional.pad(skip, [0, wu - ws, 0, hu - hs])
+        else:
+            raise ValueError(
+                f"Inconsistent skip vs upsampled shapes: {(hs, ws)}, {(hu, wu)}"
+            )
+
+        return self.layers(skip) + upsampled
+
+
+class FPN(nn.Module):
+    def __init__(self, in_channels_list, out_channels, **kw):
+        super().__init__()
+        self.first = nn.Conv2d(
+            in_channels_list[-1], out_channels, 1, padding=0, bias=True
+        )
+        self.blocks = nn.ModuleList(
+            [
+                DecoderBlock(c, out_channels, ksize=1, **kw)
+                for c in in_channels_list[::-1][1:]
+            ]
+        )
+        self.out = nn.Sequential(
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, layers):
+        feats = None
+        for idx, x in enumerate(reversed(layers.values())):
+            if feats is None:
+                feats = self.first(x)
+            else:
+                feats = self.blocks[idx - 1](feats, x)
+        out = self.out(feats)
+        return out
+
+
+def remove_conv_stride(conv):
+    conv_new = nn.Conv2d(
+        conv.in_channels,
+        conv.out_channels,
+        conv.kernel_size,
+        bias=conv.bias is not None,
+        stride=1,
+        padding=conv.padding,
+    )
+    conv_new.weight = conv.weight
+    conv_new.bias = conv.bias
+    return conv_new
+
+
+class FeatureExtractor(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "input_dim": 3,
+        "output_dim": 128,  # # of channels in output feature maps
+        "encoder": "resnet50",  # torchvision net as string
+        "remove_stride_from_first_conv": False,
+        "num_downsample": None,  # how many downsample block
+        "decoder_norm": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "do_average_pooling": False,
+        "checkpointed": False,  # whether to use gradient checkpointing
+    }
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+
+    def build_encoder(self, conf):
+        assert isinstance(conf.encoder, str)
+        if conf.pretrained:
+            assert conf.input_dim == 3
+        Encoder = getattr(torchvision.models, conf.encoder)
+
+        kw = {}
+        if conf.encoder.startswith("resnet"):
+            layers = ["relu", "layer1", "layer2", "layer3", "layer4"]
+            kw["replace_stride_with_dilation"] = [False, False, False]
+        elif conf.encoder == "vgg13":
+            layers = [
+                "features.3",
+                "features.8",
+                "features.13",
+                "features.18",
+                "features.23",
+            ]
+        elif conf.encoder == "vgg16":
+            layers = [
+                "features.3",
+                "features.8",
+                "features.15",
+                "features.22",
+                "features.29",
+            ]
+        else:
+            raise NotImplementedError(conf.encoder)
+
+        if conf.num_downsample is not None:
+            layers = layers[: conf.num_downsample]
+        encoder = Encoder(weights="DEFAULT" if conf.pretrained else None, **kw)
+        encoder = create_feature_extractor(encoder, return_nodes=layers)
+        if conf.encoder.startswith("resnet") and conf.remove_stride_from_first_conv:
+            encoder.conv1 = remove_conv_stride(encoder.conv1)
+
+        if conf.do_average_pooling:
+            raise NotImplementedError
+        if conf.checkpointed:
+            raise NotImplementedError
+
+        return encoder, layers
+
+    def _init(self, conf):
+        # Preprocessing
+        self.register_buffer("mean_", torch.tensor(self.mean), persistent=False)
+        self.register_buffer("std_", torch.tensor(self.std), persistent=False)
+
+        # Encoder
+        self.encoder, self.layers = self.build_encoder(conf)
+        s = 128
+        inp = torch.zeros(1, 3, s, s)
+        features = list(self.encoder(inp).values())
+        self.skip_dims = [x.shape[1] for x in features]
+        self.layer_strides = [s / f.shape[-1] for f in features]
+        self.scales = [self.layer_strides[0]]
+
+        # Decoder
+        norm = eval(conf.decoder_norm) if conf.decoder_norm else None  # noqa
+        self.decoder = FPN(self.skip_dims, out_channels=conf.output_dim, norm=norm)
+
+        logger.debug(
+            "Built feature extractor with layers {name:dim:stride}:\n"
+            f"{list(zip(self.layers, self.skip_dims, self.layer_strides))}\n"
+            f"and output scales {self.scales}."
+        )
+
+    def _forward(self, data):
+        image = data["image"]
+        image = (image - self.mean_[:, None, None]) / self.std_[:, None, None]
+
+        skip_features = self.encoder(image)
+        output = self.decoder(skip_features)
+        pred = {"feature_maps": [output], "skip_features": skip_features}
+        return pred
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/models/feature_extractor_v3.py b/models/feature_extractor_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ab0d21491629639e37c6e48c156ef58fa0d8c37
--- /dev/null
+++ b/models/feature_extractor_v3.py
@@ -0,0 +1,214 @@
+import logging
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+from torchvision.models.feature_extraction import create_feature_extractor
+
+from .base import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+class DecoderBlock(nn.Module):
+    def __init__(
+        self, previous, out, ksize=3, num_convs=1, norm=nn.BatchNorm2d, padding="zeros"
+    ):
+        super().__init__()
+        layers = []
+        for i in range(num_convs):
+            conv = nn.Conv2d(
+                previous if i == 0 else out,
+                out,
+                kernel_size=ksize,
+                padding=ksize // 2,
+                bias=norm is None,
+                padding_mode=padding,
+            )
+            layers.append(conv)
+            if norm is not None:
+                layers.append(norm(out))
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, previous, skip):
+        _, _, hp, wp = previous.shape
+        _, _, hs, ws = skip.shape
+        scale = 2 ** np.round(np.log2(np.array([hs / hp, ws / wp])))
+        upsampled = nn.functional.interpolate(
+            previous, scale_factor=scale.tolist(), mode="bilinear", align_corners=False
+        )
+        # If the shape of the input map `skip` is not a multiple of 2,
+        # it will not match the shape of the upsampled map `upsampled`.
+        # If the downsampling uses ceil_mode=False, we nedd to crop `skip`.
+        # If it uses ceil_mode=True (not supported here), we should pad it.
+        _, _, hu, wu = upsampled.shape
+        _, _, hs, ws = skip.shape
+        if (hu <= hs) and (wu <= ws):
+            skip = skip[:, :, :hu, :wu]
+        elif (hu >= hs) and (wu >= ws):
+            skip = nn.functional.pad(skip, [0, wu - ws, 0, hu - hs])
+        else:
+            raise ValueError(
+                f"Inconsistent skip vs upsampled shapes: {(hs, ws)}, {(hu, wu)}"
+            )
+
+        return self.layers(skip) + upsampled
+
+
+class FPN(nn.Module):
+    def __init__(self, in_channels_list, out_channels, **kw):
+        super().__init__()
+        self.first = nn.Conv2d(
+            in_channels_list[-1], out_channels, 1, padding=0, bias=True
+        )
+        self.blocks = nn.ModuleList(
+            [
+                DecoderBlock(c, out_channels, ksize=1, **kw)
+                for c in in_channels_list[::-1][1:]
+            ]
+        )
+        self.out = nn.Sequential(
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, layers):
+        feats = None
+        for idx, x in enumerate(reversed(layers.values())):
+            if feats is None:
+                feats = self.first(x)
+            else:
+                feats = self.blocks[idx - 1](feats, x)
+        out = self.out(feats)
+        return out
+
+
+def remove_conv_stride(conv):
+    conv_new = nn.Conv2d(
+        conv.in_channels,
+        conv.out_channels,
+        conv.kernel_size,
+        bias=conv.bias is not None,
+        stride=1,
+        padding=conv.padding,
+    )
+    conv_new.weight = conv.weight
+    conv_new.bias = conv.bias
+    return conv_new
+
+class DoubleConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DoubleConv, self).__init__()
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        return self.double_conv(x)
+
+class FeatureExtractor(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "input_dim": 3,
+        "output_dim": 128,  # # of channels in output feature maps
+        "encoder": "resnet50",  # torchvision net as string
+        "remove_stride_from_first_conv": False,
+        "num_downsample": None,  # how many downsample block
+        "decoder_norm": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "do_average_pooling": False,
+        "checkpointed": False,  # whether to use gradient checkpointing
+    }
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+
+    def build_encoder(self, conf):
+        assert isinstance(conf.encoder, str)
+        if conf.pretrained:
+            assert conf.input_dim == 3
+        # Encoder
+        self.conv1 = self.conv_block(conf.input_dim, 64)
+        self.pool1 = nn.MaxPool2d(2, 2)
+        self.conv2 = self.conv_block(64, 128)
+        self.pool2 = nn.MaxPool2d(2, 2)
+        self.conv3 = self.conv_block(128, 256)
+        self.pool3 = nn.MaxPool2d(2, 2)
+        self.conv4 = self.conv_block(256, 512)
+        self.pool4 = nn.MaxPool2d(2, 2)
+        self.conv5 = self.conv_block(512, 1024)
+
+        # Decoder
+        self.up6 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
+        self.conv6 = self.conv_block(1024, 512)
+        self.up7 = nn.ConvTranspose2d(512, 256, 2, stride=2)
+        self.conv7 = self.conv_block(512, 256)
+        self.up8 = nn.ConvTranspose2d(256, 128, 2, stride=2)
+        self.conv8 = self.conv_block(256, 128)
+        self.up9 = nn.ConvTranspose2d(128, 64, 2, stride=2)
+        self.conv9 = self.conv_block(128, 64)
+        self.conv10 = nn.Conv2d(64, conf.output_dim, 1)
+
+        # return encoder, layers
+    def unet(self,x):
+        # Encoder
+        conv1 = self.conv1(x)
+        pool1 = self.pool1(conv1)
+        conv2 = self.conv2(pool1)
+        pool2 = self.pool2(conv2)
+        conv3 = self.conv3(pool2)
+        pool3 = self.pool3(conv3)
+        conv4 = self.conv4(pool3)
+        pool4 = self.pool4(conv4)
+        conv5 = self.conv5(pool4)
+
+        # Decoder
+        up6 = self.up6(conv5)
+        concat6 = torch.cat([up6, conv4], dim=1)
+        conv6 = self.conv6(concat6)
+        up7 = self.up7(conv6)
+        concat7 = torch.cat([up7, conv3], dim=1)
+        conv7 = self.conv7(concat7)
+        up8 = self.up8(conv7)
+        concat8 = torch.cat([up8, conv2], dim=1)
+        conv8 = self.conv8(concat8)
+        up9 = self.up9(conv8)
+        concat9 = torch.cat([up9, conv1], dim=1)
+        conv9 = self.conv9(concat9)
+        output = self.conv10(conv9)
+
+        return output
+
+    def conv_block(self, in_channels, out_channels):
+        return nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def _init(self, conf):
+        # Preprocessing
+        self.register_buffer("mean_", torch.tensor(self.mean), persistent=False)
+        self.register_buffer("std_", torch.tensor(self.std), persistent=False)
+
+        # Encoder
+        self.build_encoder(conf)
+
+
+    def _forward(self, data):
+        image = data["image"]
+        image = (image - self.mean_[:, None, None]) / self.std_[:, None, None]
+
+        output = self.unet(image)
+        # output = self.decoder(skip_features)
+
+        pred = {"feature_maps": [output]}
+        return pred
+if __name__ == '__main__':
+    model=FeatureExtractor()
\ No newline at end of file
diff --git a/models/feature_extractor_v4.py b/models/feature_extractor_v4.py
new file mode 100644
index 0000000000000000000000000000000000000000..9efab6ab751f09cc78d084dd54804dbc1a9364a6
--- /dev/null
+++ b/models/feature_extractor_v4.py
@@ -0,0 +1,100 @@
+import logging
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+from torchvision.models.feature_extraction import create_feature_extractor
+import feature_extractor_models as smp
+import torch
+from .base import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+
+class FeatureExtractor(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "input_dim": 3,
+        "output_dim": 128,  # # of channels in output feature maps
+        "encoder": "resnet50",  # torchvision net as string
+        "remove_stride_from_first_conv": False,
+        "num_downsample": None,  # how many downsample block
+        "decoder_norm": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "do_average_pooling": False,
+        "checkpointed": False,  # whether to use gradient checkpointing
+        "architecture":"FPN"
+    }
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    # self.fmodel=None
+
+    def build_encoder(self, conf):
+        assert isinstance(conf.encoder, str)
+        if conf.pretrained:
+            assert conf.input_dim == 3
+
+
+        # return encoder, layers
+
+
+
+    def _init(self, conf):
+        # Preprocessing
+        self.register_buffer("mean_", torch.tensor(self.mean), persistent=False)
+        self.register_buffer("std_", torch.tensor(self.std), persistent=False)
+
+        if conf.architecture=="FPN":
+        # Encoder
+            self.fmodel = smp.FPN(
+                encoder_name=conf.encoder,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+                encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+                in_channels=conf.input_dim,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+                classes=conf.output_dim,  # model output channels (number of classes in your dataset)
+                upsampling=2,  # optional, final output upsampling, default is 8
+                activation=None
+            )
+        elif conf.architecture == "LightFPN":
+            self.fmodel = smp.L(
+                encoder_name=conf.encoder,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+                encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+                in_channels=conf.input_dim,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+                classes=conf.output_dim,  # model output channels (number of classes in your dataset)
+                upsampling=2,  # optional, final output upsampling, default is 8
+                activation=None
+            )
+        elif conf.architecture=="PSP":
+
+            self.fmodel =smp.PSPNet(
+                encoder_name=conf.encoder,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+                encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+                in_channels=conf.input_dim,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+                classes=conf.output_dim,  # model output channels (number of classes in your dataset)
+                upsampling=4,  # optional, final output upsampling, default is 8
+                activation=None
+            )
+        else:
+            raise ValueError("Only FPN")
+        # elif conf.architecture=="Unet":
+        #     self.fmodel = smp.FPN(
+        #         encoder_name=conf.encoder,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+        #         encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+        #         in_channels=conf.input_dim,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+        #         classes=conf.output_dim,  # model output channels (number of classes in your dataset)
+        #         # upsampling=int(conf.upsampling),  # optional, final output upsampling, default is 8
+        #         activation="relu"
+        #     )
+
+
+    def _forward(self, data):
+        image = data["image"]
+        image = (image - self.mean_[:, None, None]) / self.std_[:, None, None]
+
+        output = self.fmodel(image)
+        # output = self.decoder(skip_features)
+
+        pred = {"feature_maps": [output]}
+        return pred
+if __name__ == '__main__':
+    model=FeatureExtractor()
\ No newline at end of file
diff --git a/models/feature_extractor_v5.py b/models/feature_extractor_v5.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c6c41ad420c471be6a5e36cc0eb3fcc2e188792
--- /dev/null
+++ b/models/feature_extractor_v5.py
@@ -0,0 +1,423 @@
+import logging
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+from torchvision.models.feature_extraction import create_feature_extractor
+import feature_extractor_models as smp
+import torch
+from .base import BaseModel
+
+logger = logging.getLogger(__name__)
+
+import math
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init
+from collections import OrderedDict
+
+import torch.distributed as dist
+
+
+def get_batch_norm(inplace=False):
+    if dist.is_available() and dist.is_initialized():  # 检查是否在分布式环境中
+        return nn.SyncBatchNorm
+    else:
+        return nn.BatchNorm2d
+
+
+BatchNorm2d = get_batch_norm()
+bn_mom = 0.1
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, no_relu=False):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = BatchNorm2d(planes, momentum=bn_mom)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = BatchNorm2d(planes, momentum=bn_mom)
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class Bottleneck(nn.Module):
+    expansion = 2
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, no_relu=True):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm2d(planes, momentum=bn_mom)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = BatchNorm2d(planes, momentum=bn_mom)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = BatchNorm2d(planes * self.expansion, momentum=bn_mom)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class DAPPM(nn.Module):
+    def __init__(self, inplanes, branch_planes, outplanes):
+        super(DAPPM, self).__init__()
+        self.scale1 = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=2, padding=2),
+                                    BatchNorm2d(inplanes, momentum=bn_mom),
+                                    nn.ReLU(inplace=True),
+                                    nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+                                    )
+        self.scale2 = nn.Sequential(nn.AvgPool2d(kernel_size=9, stride=4, padding=4),
+                                    BatchNorm2d(inplanes, momentum=bn_mom),
+                                    nn.ReLU(inplace=True),
+                                    nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+                                    )
+        self.scale3 = nn.Sequential(nn.AvgPool2d(kernel_size=17, stride=8, padding=8),
+                                    BatchNorm2d(inplanes, momentum=bn_mom),
+                                    nn.ReLU(inplace=True),
+                                    nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+                                    )
+        self.scale4 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+                                    BatchNorm2d(inplanes, momentum=bn_mom),
+                                    nn.ReLU(inplace=True),
+                                    nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+                                    )
+        self.scale0 = nn.Sequential(
+            BatchNorm2d(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, branch_planes, kernel_size=1, bias=False),
+        )
+        self.process1 = nn.Sequential(
+            BatchNorm2d(branch_planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False),
+        )
+        self.process2 = nn.Sequential(
+            BatchNorm2d(branch_planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False),
+        )
+        self.process3 = nn.Sequential(
+            BatchNorm2d(branch_planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False),
+        )
+        self.process4 = nn.Sequential(
+            BatchNorm2d(branch_planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False),
+        )
+        self.compression = nn.Sequential(
+            BatchNorm2d(branch_planes * 5, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(branch_planes * 5, outplanes, kernel_size=1, bias=False),
+        )
+        self.shortcut = nn.Sequential(
+            BatchNorm2d(inplanes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=False),
+        )
+
+    def forward(self, x):
+        # x = self.downsample(x)
+        width = x.shape[-1]
+        height = x.shape[-2]
+        x_list = []
+
+        x_list.append(self.scale0(x))
+        x_list.append(self.process1((F.interpolate(self.scale1(x),
+                                                   size=[height, width],
+                                                   mode='bilinear') + x_list[0])))
+        x_list.append((self.process2((F.interpolate(self.scale2(x),
+                                                    size=[height, width],
+                                                    mode='bilinear') + x_list[1]))))
+        x_list.append(self.process3((F.interpolate(self.scale3(x),
+                                                   size=[height, width],
+                                                   mode='bilinear') + x_list[2])))
+        x_list.append(self.process4((F.interpolate(self.scale4(x),
+                                                   size=[height, width],
+                                                   mode='bilinear') + x_list[3])))
+
+        out = self.compression(torch.cat(x_list, 1)) + self.shortcut(x)
+        return out
+
+
+class segmenthead(nn.Module):
+
+    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
+        super(segmenthead, self).__init__()
+        self.bn1 = BatchNorm2d(inplanes, momentum=bn_mom)
+        self.conv1 = nn.Conv2d(inplanes, interplanes, kernel_size=3, padding=1, bias=False)
+        self.bn2 = BatchNorm2d(interplanes, momentum=bn_mom)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(interplanes, outplanes, kernel_size=1, padding=0, bias=True)
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = self.conv1(self.relu(self.bn1(x)))
+        out = self.conv2(self.relu(self.bn2(x)))
+
+        if self.scale_factor is not None:
+            height = x.shape[-2] * self.scale_factor
+            width = x.shape[-1] * self.scale_factor
+            out = F.interpolate(out,
+                                size=[height, width],
+                                mode='bilinear')
+
+        return out
+
+
+class DualResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=19, planes=64, spp_planes=128, head_planes=128, augment=False):
+        super(DualResNet, self).__init__()
+
+        highres_planes = planes * 2
+        self.augment = augment
+
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(3, planes, kernel_size=3, stride=2, padding=1),
+            BatchNorm2d(planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(planes, planes, kernel_size=3, stride=2, padding=1),
+            BatchNorm2d(planes, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+        )
+
+        self.relu = nn.ReLU(inplace=False)
+        self.layer1 = self._make_layer(block, planes, planes, layers[0])
+        self.layer2 = self._make_layer(block, planes, planes * 2, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, planes * 2, planes * 4, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, planes * 4, planes * 8, layers[3], stride=2)
+
+        self.compression3 = nn.Sequential(
+            nn.Conv2d(planes * 4, highres_planes, kernel_size=1, bias=False),
+            BatchNorm2d(highres_planes, momentum=bn_mom),
+        )
+
+        self.compression4 = nn.Sequential(
+            nn.Conv2d(planes * 8, highres_planes, kernel_size=1, bias=False),
+            BatchNorm2d(highres_planes, momentum=bn_mom),
+        )
+
+        self.down3 = nn.Sequential(
+            nn.Conv2d(highres_planes, planes * 4, kernel_size=3, stride=2, padding=1, bias=False),
+            BatchNorm2d(planes * 4, momentum=bn_mom),
+        )
+
+        self.down4 = nn.Sequential(
+            nn.Conv2d(highres_planes, planes * 4, kernel_size=3, stride=2, padding=1, bias=False),
+            BatchNorm2d(planes * 4, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(planes * 4, planes * 8, kernel_size=3, stride=2, padding=1, bias=False),
+            BatchNorm2d(planes * 8, momentum=bn_mom),
+        )
+
+        self.layer3_ = self._make_layer(block, planes * 2, highres_planes, 2)
+
+        self.layer4_ = self._make_layer(block, highres_planes, highres_planes, 2)
+
+        self.layer5_ = self._make_layer(Bottleneck, highres_planes, highres_planes, 1)
+
+        self.layer5 = self._make_layer(Bottleneck, planes * 8, planes * 8, 1, stride=2)
+
+        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
+
+        if self.augment:
+            self.seghead_extra = segmenthead(highres_planes, head_planes, num_classes)
+
+        self.final_layer = segmenthead(planes * 4, head_planes, num_classes, scale_factor=4)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=bn_mom),
+            )
+
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            if i == (blocks - 1):
+                layers.append(block(inplanes, planes, stride=1, no_relu=True))
+            else:
+                layers.append(block(inplanes, planes, stride=1, no_relu=False))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+
+        width_output = x.shape[-1] // 8
+        height_output = x.shape[-2] // 8
+        layers = []
+
+        x = self.conv1(x)
+
+        x = self.layer1(x)
+        layers.append(x)
+
+        x = self.layer2(self.relu(x))
+        layers.append(x)
+
+        x = self.layer3(self.relu(x))
+        layers.append(x)
+        x_ = self.layer3_(self.relu(layers[1]))
+
+        x = x + self.down3(self.relu(x_))
+        x_ = x_ + F.interpolate(
+            self.compression3(self.relu(layers[2])),
+            size=[height_output, width_output],
+            mode='bilinear')
+        if self.augment:
+            temp = x_
+
+        x = self.layer4(self.relu(x))
+        layers.append(x)
+        x_ = self.layer4_(self.relu(x_))
+
+        x = x + self.down4(self.relu(x_))
+        x_ = x_ + F.interpolate(
+            self.compression4(self.relu(layers[3])),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        x_ = self.layer5_(self.relu(x_))
+        x = F.interpolate(
+            self.spp(self.layer5(self.relu(x))),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        x_ = self.final_layer(x + x_)
+
+        if self.augment:
+            x_extra = self.seghead_extra(temp)
+            return [x_, x_extra]
+        else:
+            return x_
+
+class FeatureExtractor(BaseModel):
+    default_conf = {
+        "pretrained": True,
+        "input_dim": 3,
+        "output_dim": 128,  # # of channels in output feature maps
+        "encoder": "resnet50",  # torchvision net as string
+        "remove_stride_from_first_conv": False,
+        "num_downsample": None,  # how many downsample block
+        "decoder_norm": "nn.BatchNorm2d",  # normalization ind decoder blocks
+        "do_average_pooling": False,
+        "checkpointed": False,  # whether to use gradient checkpointing
+        "architecture":"FPN"
+    }
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    # self.fmodel=None
+
+    def build_encoder(self, conf):
+        assert isinstance(conf.encoder, str)
+        if conf.pretrained:
+            assert conf.input_dim == 3
+
+
+        # return encoder, layers
+
+
+
+    def _init(self, conf):
+        # Preprocessing
+        self.register_buffer("mean_", torch.tensor(self.mean), persistent=False)
+        self.register_buffer("std_", torch.tensor(self.std), persistent=False)
+
+        if conf.architecture=="DDRNet23s":
+        # Encoder
+            self.fmodel= DualResNet(BasicBlock, [2, 2, 2, 2], num_classes=conf.output_dim, planes=32, spp_planes=128, head_planes=64, augment=False)
+        else:
+            raise ValueError("DDRNet23s")
+        # elif conf.architecture=="Unet":
+        #     self.fmodel = smp.FPN(
+        #         encoder_name=conf.encoder,  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+        #         encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+        #         in_channels=conf.input_dim,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+        #         classes=conf.output_dim,  # model output channels (number of classes in your dataset)
+        #         # upsampling=int(conf.upsampling),  # optional, final output upsampling, default is 8
+        #         activation="relu"
+        #     )
+
+
+    def _forward(self, data):
+        image = data["image"]
+        image = (image - self.mean_[:, None, None]) / self.std_[:, None, None]
+
+        output = self.fmodel(image)
+        # output = self.decoder(skip_features)
+
+        pred = {"feature_maps": [output]}
+        return pred
+if __name__ == '__main__':
+    model=FeatureExtractor()
\ No newline at end of file
diff --git a/models/map_encoder.py b/models/map_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec7d838122715ecd483e9eac5012ee481614d5b3
--- /dev/null
+++ b/models/map_encoder.py
@@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import torch
+import torch.nn as nn
+
+from .base import BaseModel
+from .feature_extractor import FeatureExtractor
+
+
+class MapEncoder(BaseModel):
+    default_conf = {
+        "embedding_dim": "???",
+        "output_dim": None,
+        "num_classes": "???",
+        "backbone": "???",
+        "unary_prior": False,
+    }
+
+    def _init(self, conf):
+        self.embeddings = torch.nn.ModuleDict(
+            {
+                k: torch.nn.Embedding(n + 1, conf.embedding_dim)
+                for k, n in conf.num_classes.items()
+            }
+        )
+        #num_calsses:{'areas': 7, 'ways': 10, 'nodes': 33}
+        input_dim = len(conf.num_classes) * conf.embedding_dim
+        output_dim = conf.output_dim
+        if output_dim is None:
+            output_dim = conf.backbone.output_dim
+        if conf.unary_prior:
+            output_dim += 1
+        if conf.backbone is None:
+            self.encoder = nn.Conv2d(input_dim, output_dim, 1)
+        elif conf.backbone == "simple":
+            self.encoder = nn.Sequential(
+                nn.Conv2d(input_dim, 128, 3, padding=1),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, 128, 3, padding=1),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, output_dim, 3, padding=1),
+            )
+        else:
+            self.encoder = FeatureExtractor(
+                {
+                    **conf.backbone,
+                    "input_dim": input_dim,
+                    "output_dim": output_dim,
+                }
+            )
+
+    def _forward(self, data):
+        # embeddings = [
+        #     self.embeddings[k](data["map"][:, i].cuda())
+        #     for i, k in enumerate(("areas", "ways", "nodes"))
+        # ]
+        embeddings = [
+            self.embeddings[k](data["map"][:, i])
+            for i, k in enumerate(("areas", "ways", "nodes"))
+        ]
+        # for i in embeddings:
+            # print("embeddings:",i.shape)#[6, 256, 256, 16]
+        embeddings = torch.cat(embeddings, dim=-1).permute(0, 3, 1, 2)#[6,48, 256, 256]
+        # print("Old",embeddings.shape)
+        pass
+        if isinstance(self.encoder, BaseModel):
+            # print("encoder BaseModel:")
+            features = self.encoder({"image": embeddings})["feature_maps"]
+        else:
+            # print("encoder NoBaseModel:")
+            features = [self.encoder(embeddings)]
+        pred = {}
+        if self.conf.unary_prior:
+            pred["log_prior"] = [f[:, -1] for f in features]
+            features = [f[:, :-1] for f in features]
+        pred["map_features"] = features
+        return pred
diff --git a/models/map_encoder_single.py b/models/map_encoder_single.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ef157355dc2ca4a1544dddc9ba4a522a7ffa607
--- /dev/null
+++ b/models/map_encoder_single.py
@@ -0,0 +1,213 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import torch
+import torch.nn as nn
+
+from .base import BaseModel
+from .feature_extractor import FeatureExtractor
+import numpy as np
+# from  .embeddings import AttentionWeightedEmbedding,
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import torch.nn.functional as F
+
+class ImprovedAttentionEmbedding(nn.Module):
+    def __init__(self, num_embeddings, embedding_dim, weight_dim=1, dropout=0.1, weight_init='normal'):
+        super(ImprovedAttentionEmbedding, self).__init__()
+        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
+        self.weight_dim = weight_dim
+
+        # 可学习的权重矩阵 [num_embeddings, weight_dim]
+        if weight_init == 'normal':
+            self.weights = nn.Parameter(torch.randn(num_embeddings, weight_dim))
+        elif weight_init == 'uniform':
+            self.weights = nn.Parameter(torch.rand(num_embeddings, weight_dim))
+        else:
+            self.weights = nn.Parameter(torch.ones(num_embeddings, weight_dim))
+
+        self.weight_norm = nn.LayerNorm(weight_dim)
+        self.dropout = nn.Dropout(dropout)
+
+        # L2正则化
+        self.l2_reg = 1e-5
+
+    def forward(self, input):
+        embedded = self.embedding(input)  # [batch, 256, 256, embedding_dim]
+
+        # 获取权重，并进行归一化
+        weight = self.weights[input]  # [batch, 256, 256, weight_dim]
+        weight = self.weight_norm(weight)
+        weight = F.softmax(weight, dim=-1)
+
+        # 对嵌入向量进行加权
+        if self.weight_dim == 1:
+            weighted_embedded = embedded * weight  # [batch, 256, 256, embedding_dim]
+        else:
+            weighted_embedded = embedded * weight.unsqueeze(-1)
+
+        weighted_embedded = self.dropout(weighted_embedded)
+
+        return weighted_embedded
+
+    def get_l2_reg(self):
+        return self.l2_reg * (self.weights ** 2).sum()
+class AttentionWeightedEmbedding(nn.Module):
+    def __init__(self, num_embeddings, embedding_dim):
+        super(AttentionWeightedEmbedding, self).__init__()
+        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
+        self.query = nn.Parameter(torch.randn(embedding_dim))  # 可训练的查询向量
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, input):
+        # 获取嵌入向量
+        embedded = self.embedding(input)  # Shape: [batch_size, sequence_length, embedding_dim]
+
+        # 计算注意力得分
+        attn_scores = torch.matmul(embedded, self.query)  # Shape: [batch_size, sequence_length]
+
+        # 归一化注意力得分以得到权重
+        attn_weights = self.softmax(attn_scores).unsqueeze(-1)  # Shape: [batch_size, sequence_length, 1]
+
+        # 对嵌入向量应用权重
+        weighted_embedded = embedded * attn_weights  # Shape: [batch_size, sequence_length, embedding_dim]
+
+        return weighted_embedded
+class WeightedEmbedding(nn.Module):
+    def __init__(self, num_embeddings, embedding_dim):
+        super(WeightedEmbedding, self).__init__()
+        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
+        # 可学习的权重矩阵 [num_embeddings, 1]
+        self.weights = nn.Parameter(torch.ones(num_embeddings, 1))
+
+    def forward(self, input):
+        embedded = self.embedding(input)  # [batch, 256, 256, embedding_dim]
+        # 获取权重，并扩展维度以便进行广播运算
+        weight = self.weights[input]  # [batch, 256, 256, 1]
+        # 对嵌入向量进行按元素乘法
+        weighted_embedded = embedded * weight  # [batch, 256, 256, embedding_dim]
+        return weighted_embedded
+
+class MapEncoderSingle(BaseModel):
+    default_conf = {
+        "embedding_dim": "???",
+        "output_dim": None,
+        "num_classes": "???",
+        "backbone": "???",
+        "unary_prior": False,
+        "weighted_embedding": False
+    }
+
+    def _init(self, conf):
+        if conf.weighted_embedding==False:
+            self.embeddings = torch.nn.ModuleDict(
+                {
+                    k: torch.nn.Embedding(n + 1, conf.embedding_dim)
+                    for k, n in conf.num_classes.items()
+                }
+            )
+        else:
+            if conf.weighted_embedding=="AttentionWeightedEmbedding":
+                self.embeddings = torch.nn.ModuleDict(
+                    {
+                        k: AttentionWeightedEmbedding(n + 1, conf.embedding_dim)
+                        for k, n in conf.num_classes.items()
+                    }
+                )
+            elif conf.weighted_embedding=="WeightedEmbedding":
+                self.embeddings = torch.nn.ModuleDict(
+                    {
+                        k: WeightedEmbedding(n + 1, conf.embedding_dim)
+                        for k, n in conf.num_classes.items()
+                    }
+                )
+            elif conf.weighted_embedding=="ImprovedAttentionEmbedding":
+                self.embeddings = torch.nn.ModuleDict(
+                    {
+                        k: ImprovedAttentionEmbedding(n + 1, conf.embedding_dim)
+                        for k, n in conf.num_classes.items()
+                    }
+                )
+            else:
+                pass
+
+
+        #num_calsses:{'areas': 7, 'ways': 10, 'nodes': 33}
+        input_dim = len(conf.num_classes) * conf.embedding_dim
+        output_dim = conf.output_dim
+        if output_dim is None:
+            output_dim = conf.backbone.output_dim
+        if conf.unary_prior:
+            output_dim += 1
+        if conf.backbone is None:
+            self.encoder = nn.Conv2d(input_dim, output_dim, 1)
+        elif conf.backbone == "simple":
+            self.encoder = nn.Sequential(
+                nn.Conv2d(input_dim, 128, 3, padding=1),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, 128, 3, padding=1),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, output_dim, 3, padding=1),
+            )
+        else:
+            self.encoder = FeatureExtractor(
+                {
+                    **conf.backbone,
+                    "input_dim": input_dim,
+                    "output_dim": output_dim,
+                }
+            )
+    def batch_process(self,input_tensor):
+        # 获取输入张量的维度
+        batch_size, dim1, dim2, dim3 = input_tensor.shape
+
+        # 首先，我们需要对第一个索引为0的二维数组中的非零元素增加43
+        input_tensor[:, 0, :, :] += torch.where(input_tensor[:, 0, :, :] != 0, 43, 0)
+
+        # 接着，对第一个索引为1的二维数组中的非零元素增加33
+        input_tensor[:, 1, :, :] += torch.where(input_tensor[:, 1, :, :] != 0, 33, 0)
+
+        # 创建一个全零的输出张量
+        output_tensor = torch.zeros((batch_size, dim2, dim3), dtype=input_tensor.dtype, device=input_tensor.device)
+
+        # 找到输入张量中至少有一个非零值的位置
+        nonzero_mask = torch.any(input_tensor != 0, dim=1)
+
+        # 根据优先级赋值
+        output_tensor[nonzero_mask] = input_tensor[:, 2, :, :][nonzero_mask]
+        output_tensor[nonzero_mask] = torch.where(input_tensor[:, 2, :, :][nonzero_mask] == 0, input_tensor[:, 1, :, :][nonzero_mask], output_tensor[nonzero_mask])
+        output_tensor[nonzero_mask] = torch.where(torch.logical_and(input_tensor[:, 2, :, :][nonzero_mask] == 0, input_tensor[:, 1, :, :][nonzero_mask] == 0), input_tensor[:, 0, :, :][nonzero_mask], output_tensor[nonzero_mask])
+
+        return output_tensor
+    def _forward(self, data):
+        temp=data["map"]
+        temp=self.batch_process(temp)
+        # a=self.embeddings["all"]
+        # print("temp",temp.shape,data["map"].shape)
+
+        # 找到tensor中的最大值
+        # max_value = temp.max()
+
+        # print("最大值是:", max_value.item())
+        embeddings = self.embeddings["all"](temp)#shape:[batch,256,256,48]
+
+        # print("embeddings.shape A",embeddings.shape)
+        embeddings =embeddings.permute(0, 3, 1, 2)
+        # print("embeddings.shape B",embeddings.shape)
+        # print("Single",embeddings.shape)
+        pass
+        if isinstance(self.encoder, BaseModel):
+            # print("encoder is BaseModel")
+            features = self.encoder({"image": embeddings})["feature_maps"]
+        else:
+            # print("encoder is not BaseModel")
+            features = [self.encoder(embeddings)]
+        pred = {}
+        if self.conf.unary_prior:
+            pred["log_prior"] = [f[:, -1] for f in features]
+            features = [f[:, :-1] for f in features]
+        pred["map_features"] = features#6,8,256,256 list of tensor ,shape:[6,8, 256, 256]
+
+        return pred
diff --git a/models/maplocnet.py b/models/maplocnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..23084359f5a51a8ba995ec8124a8588840b39adf
--- /dev/null
+++ b/models/maplocnet.py
@@ -0,0 +1,260 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+import torch
+from torch.nn.functional import normalize
+
+from . import get_model
+from models.base import BaseModel
+from models.bev_net import BEVNet
+from models.bev_projection import CartesianProjection, PolarProjectionDepth
+from models.voting import (
+    argmax_xyr,argmax_xyrh,
+    conv2d_fft_batchwise,
+    expectation_xyr,
+    log_softmax_spatial,
+    mask_yaw_prior,
+    nll_loss_xyr,
+    nll_loss_xyr_smoothed,
+    TemplateSampler,
+    UAVTemplateSampler,
+    UAVTemplateSamplerFast
+)
+import torch.nn.functional as F
+from torch.nn.functional import grid_sample, log_softmax, pad
+from .map_encoder import MapEncoder
+from .map_encoder_single import MapEncoderSingle
+from .metrics import AngleError, AngleRecall, Location2DError, Location2DRecall
+
+
+class MapLocNet(BaseModel):
+    default_conf = {
+        "image_size": "???",
+        "val_citys":"???",
+        "image_encoder": "???",
+        "map_encoder": "???",
+        "bev_net": "???",
+        "latent_dim": "???",
+        "matching_dim": "???",
+        "scale_range": [0, 9],
+        "num_scale_bins": "???",
+        "z_min": None,
+        "z_max": "???",
+        "x_max": "???",
+        "pixel_per_meter": "???",
+        "num_rotations": "???",
+        "add_temperature": False,
+        "normalize_features": False,
+        "padding_matching": "replicate",
+        "apply_map_prior": True,
+        "do_label_smoothing": False,
+        "sigma_xy": 1,
+        "sigma_r": 2,
+        # depcreated
+        "depth_parameterization": "scale",
+        "norm_depth_scores": False,
+        "normalize_scores_by_dim": False,
+        "normalize_scores_by_num_valid": True,
+        "prior_renorm": True,
+        "retrieval_dim": None,
+    }
+
+    def _init(self, conf):
+        assert not self.conf.norm_depth_scores
+        assert self.conf.depth_parameterization == "scale"
+        assert not self.conf.normalize_scores_by_dim
+        assert self.conf.normalize_scores_by_num_valid
+        assert self.conf.prior_renorm
+        # a=conf.image_encoder.get("name", "feature_extractor_v2")
+        # b=conf.image_encoder.get("name")
+        Encoder = get_model(conf.image_encoder.get("name"))
+        self.image_encoder = Encoder(conf.image_encoder.backbone)
+        
+        if len(conf.map_encoder.num_classes)==1:
+            self.map_encoder = MapEncoderSingle(conf.map_encoder)
+        else:
+            self.map_encoder = MapEncoder(conf.map_encoder)
+        # self.bev_net = None if conf.bev_net is None else BEVNet(conf.bev_net)
+
+        ppm = conf.pixel_per_meter
+        # self.projection_polar = PolarProjectionDepth(
+        #     conf.z_max,
+        #     ppm,
+        #     conf.scale_range,
+        #     conf.z_min,
+        # )
+        # self.projection_bev = CartesianProjection(
+        #     conf.z_max, conf.x_max, ppm, conf.z_min
+        # )
+        # self.template_sampler = TemplateSampler(
+        #     self.projection_bev.grid_xz, ppm, conf.num_rotations
+        # )
+        self.template_sampler = UAVTemplateSamplerFast(conf.num_rotations,w=conf.image_size//2)
+        # self.template_sampler = UAVTemplateSampler(conf.num_rotations)
+        # self.scale_classifier = torch.nn.Linear(conf.latent_dim, conf.num_scale_bins)
+        # if conf.bev_net is None:
+        #     self.feature_projection = torch.nn.Linear(
+        #         conf.latent_dim, conf.matching_dim
+        #     )
+        if conf.add_temperature:
+            temperature = torch.nn.Parameter(torch.tensor(0.0))
+            self.register_parameter("temperature", temperature)
+
+    def exhaustive_voting(self, f_bev, f_map):
+        if self.conf.normalize_features:
+            f_bev = normalize(f_bev, dim=1)
+            f_map = normalize(f_map, dim=1)
+
+        # Build the templates and exhaustively match against the map.
+        # if confidence_bev is not None:
+        #     f_bev = f_bev * confidence_bev.unsqueeze(1)
+        # f_bev = f_bev.masked_fill(~valid_bev.unsqueeze(1), 0.0)
+        # torch.save(f_bev, 'f_bev.pt')
+        # torch.save(f_map, 'f_map.pt')
+        f_map = F.interpolate(f_map, size=(256, 256), mode='bilinear', align_corners=False)
+
+        templates = self.template_sampler(f_bev)#[batch,256,8,129,129]
+        # torch.save(templates, 'templates.pt')
+        with torch.autocast("cuda", enabled=False):
+            scores = conv2d_fft_batchwise(
+                f_map.float(),
+                templates.float(),
+                padding_mode=self.conf.padding_matching,
+            )
+        if self.conf.add_temperature:
+            scores = scores * torch.exp(self.temperature)
+
+        # Reweight the different rotations based on the number of valid pixels
+        # in each template. Axis-aligned rotation have the maximum number of valid pixels.
+        # valid_templates = self.template_sampler(valid_bev.float()[None]) > (1 - 1e-4)
+        # num_valid = valid_templates.float().sum((-3, -2, -1))
+        # scores = scores / num_valid[..., None, None]
+        return scores
+
+    def _forward(self, data):
+        pred = {}
+        pred_map = pred["map"] = self.map_encoder(data)
+        f_map = pred_map["map_features"][0]#[batch,8,256,256]
+
+        # Extract image features.
+        level = 0
+        f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176]
+        # print("f_map:",f_map.shape)
+
+        scores = self.exhaustive_voting(f_image, f_map)#f_bev:[batch,8,64,129]  f_map:[batch,8,256,256] confidence:[1,64,129]
+        scores = scores.moveaxis(1, -1)  # B,H,W,N
+        if "log_prior" in pred_map and self.conf.apply_map_prior:
+            scores = scores + pred_map["log_prior"][0].unsqueeze(-1)
+        # pred["scores_unmasked"] = scores.clone()
+        if "map_mask" in data:
+            scores.masked_fill_(~data["map_mask"][..., None], -np.inf)
+        if "yaw_prior" in data:
+            mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations)
+        log_probs = log_softmax_spatial(scores)
+        # torch.save(scores, 'scores.pt')
+        with torch.no_grad():
+            uvr_max = argmax_xyr(scores).to(scores)
+            uvr_avg, _ = expectation_xyr(log_probs.exp())
+
+        return {
+            **pred,
+            "scores": scores,
+            "log_probs": log_probs,
+            "uvr_max": uvr_max,
+            "uv_max": uvr_max[..., :2],
+            "yaw_max": uvr_max[..., 2],
+            "uvr_expectation": uvr_avg,
+            "uv_expectation": uvr_avg[..., :2],
+            "yaw_expectation": uvr_avg[..., 2],
+            "features_image": f_image,
+        }
+    def _forward_scale(self, data,resize=None):
+        pred = {}
+        pred_map = pred["map"] = self.map_encoder(data)
+        f_map = pred_map["map_features"][0]#[batch,8,256,256]
+
+        # Extract image features.
+        level = 0
+        f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176]
+        # print("f_map:",f_map.shape)
+        scores_list = []
+
+        for resize_size in resize:
+            f_image_re = torch.nn.functional.interpolate(f_image, size=resize_size, mode='bilinear', align_corners=False)
+            scores = self.exhaustive_voting(f_image_re, f_map)#f_bev:[batch,8,64,129]  f_map:[batch,8,256,256] confidence:[1,64,129]
+            scores = scores.moveaxis(1, -1)  # B,H,W,N
+            scores_list.append(scores)
+        scores_list = torch.stack(scores_list, dim=-1)
+        log_probs_list = log_softmax(scores_list.flatten(-4), dim=-1).reshape(scores_list.shape)
+
+
+        # if "log_prior" in pred_map and self.conf.apply_map_prior:
+        #     scores = scores + pred_map["log_prior"][0].unsqueeze(-1)
+        # # pred["scores_unmasked"] = scores.clone()
+        # if "map_mask" in data:
+        #     scores.masked_fill_(~data["map_mask"][..., None], -np.inf)
+        # if "yaw_prior" in data:
+        #     mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations)
+        #scores shape:[batch,W,H,64]
+
+        # log_probs = log_softmax_spatial(scores)
+        # torch.save(scores, 'scores.pt')
+        with torch.no_grad():
+            uvr_max = argmax_xyrh(scores_list)
+            # uvr_avg, _ = expectation_xyr(log_probs_list.exp())
+            uvr_avg= uvr_max
+
+        return {
+            **pred,
+            "scores": scores,
+            "log_probs": log_probs_list,
+            "uvr_max": uvr_max,
+            "uv_max": uvr_max[..., :2],
+            "yaw_max": uvr_max[..., 2],
+            "uvr_expectation": uvr_avg,
+            "uv_expectation": uvr_avg[..., :2],
+            "yaw_expectation": uvr_avg[..., 2],
+            "features_image": f_image,
+        }
+    def loss(self, pred, data):
+        xy_gt = data["uv"]
+        yaw_gt = data["roll_pitch_yaw"][..., -1]
+        if self.conf.do_label_smoothing:
+            nll = nll_loss_xyr_smoothed(
+                pred["log_probs"],
+                xy_gt,
+                yaw_gt,
+                self.conf.sigma_xy / self.conf.pixel_per_meter,
+                self.conf.sigma_r,
+                mask=data.get("map_mask"),
+            )
+        else:
+            nll = nll_loss_xyr(pred["log_probs"], xy_gt, yaw_gt)
+        loss = {"total": nll, "nll": nll}
+        if self.training and self.conf.add_temperature:
+            loss["temperature"] = self.temperature.expand(len(nll))
+        return loss
+
+    def metrics(self):
+        return {
+            "xy_max_error": Location2DError("uv_max", self.conf.pixel_per_meter),
+            "xy_expectation_error": Location2DError(
+                "uv_expectation", self.conf.pixel_per_meter
+            ),
+            "yaw_max_error": AngleError("yaw_max"),
+            "xy_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            "xy_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            "xy_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+
+            # "x_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            # "x_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            # "x_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+            #
+            # "y_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            # "y_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            # "y_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+
+            "yaw_recall_1°": AngleRecall(1.0, "yaw_max"),
+            "yaw_recall_3°": AngleRecall(3.0, "yaw_max"),
+            "yaw_recall_5°": AngleRecall(5.0, "yaw_max"),
+        }
diff --git a/models/metrics.py b/models/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..b50a724f4719853b476d693db25ddbba562a3a51
--- /dev/null
+++ b/models/metrics.py
@@ -0,0 +1,118 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import torch
+import torchmetrics
+from torchmetrics.utilities.data import dim_zero_cat
+
+from .utils import deg2rad, rotmat2d
+
+
+def location_error(uv, uv_gt, ppm=1):
+    return torch.norm(uv - uv_gt.to(uv), dim=-1) / ppm
+
+def location_error_single(uv, uv_gt, ppm=1):
+    return torch.norm(uv - uv_gt.to(uv), dim=-1) / ppm
+
+def angle_error(t, t_gt):
+    error = torch.abs(t % 360 - t_gt.to(t) % 360)
+    error = torch.minimum(error, 360 - error)
+    return error
+
+
+class Location2DRecall(torchmetrics.MeanMetric):
+    def __init__(self, threshold, pixel_per_meter, key="uv_max", *args, **kwargs):
+        self.threshold = threshold
+        self.ppm = pixel_per_meter
+        self.key = key
+        super().__init__(*args, **kwargs)
+
+    def update(self, pred, data):
+        self.cuda()
+        error = location_error(pred[self.key], data["uv"], self.ppm)
+        # print(error,self.threshold)
+        super().update((error <= torch.tensor(self.threshold,device=error.device)).float())
+
+class Location1DRecall(torchmetrics.MeanMetric):
+    def __init__(self, threshold, pixel_per_meter, key="uv_max", *args, **kwargs):
+        self.threshold = threshold
+        self.ppm = pixel_per_meter
+        self.key = key
+        super().__init__(*args, **kwargs)
+
+    def update(self, pred, data):
+        self.cuda()
+        error = location_error(pred[self.key], data["uv"], self.ppm)
+        # print(error,self.threshold)
+        super().update((error <= torch.tensor(self.threshold,device=error.device)).float())
+class AngleRecall(torchmetrics.MeanMetric):
+    def __init__(self, threshold, key="yaw_max", *args, **kwargs):
+        self.threshold = threshold
+        self.key = key
+
+        super().__init__(*args, **kwargs)
+
+    def update(self, pred, data):
+        self.cuda()
+        error = angle_error(pred[self.key], data["roll_pitch_yaw"][..., -1])
+        super().update((error <= self.threshold).float())
+
+
+class MeanMetricWithRecall(torchmetrics.Metric):
+    full_state_update = True
+
+    def __init__(self):
+        super().__init__()
+        self.add_state("value", default=[], dist_reduce_fx="cat")
+    def compute(self):
+        return dim_zero_cat(self.value).mean(0)
+
+    def get_errors(self):
+        return dim_zero_cat(self.value)
+
+    def recall(self, thresholds):
+        self.cuda()
+        error = self.get_errors()
+        thresholds = error.new_tensor(thresholds)
+        return (error.unsqueeze(-1) < thresholds).float().mean(0) * 100
+
+
+class AngleError(MeanMetricWithRecall):
+    def __init__(self, key):
+        super().__init__()
+        self.key = key
+
+    def update(self, pred, data):
+        self.cuda()
+        value = angle_error(pred[self.key], data["roll_pitch_yaw"][..., -1])
+        if value.numel():
+            self.value.append(value)
+
+
+class Location2DError(MeanMetricWithRecall):
+    def __init__(self, key, pixel_per_meter):
+        super().__init__()
+        self.key = key
+        self.ppm = pixel_per_meter
+
+    def update(self, pred, data):
+        self.cuda()
+        value = location_error(pred[self.key], data["uv"], self.ppm)
+        if value.numel():
+            self.value.append(value)
+
+
+class LateralLongitudinalError(MeanMetricWithRecall):
+    def __init__(self, pixel_per_meter, key="uv_max"):
+        super().__init__()
+        self.ppm = pixel_per_meter
+        self.key = key
+
+    def update(self, pred, data):
+        self.cuda()
+        yaw = deg2rad(data["roll_pitch_yaw"][..., -1])
+        shift = (pred[self.key] - data["uv"]) * yaw.new_tensor([-1, 1])
+        shift = (rotmat2d(yaw) @ shift.unsqueeze(-1)).squeeze(-1)
+        error = torch.abs(shift) / self.ppm
+        value = error.view(-1, 2)
+        if value.numel():
+            self.value.append(value)
diff --git a/models/orienternet.py b/models/orienternet.py
new file mode 100644
index 0000000000000000000000000000000000000000..9315f10afedc4d6cc49e4ba78e82d7ae8d510795
--- /dev/null
+++ b/models/orienternet.py
@@ -0,0 +1,204 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+import torch
+from torch.nn.functional import normalize
+
+from . import get_model
+from models.base import BaseModel
+from models.bev_net import BEVNet
+from models.bev_projection import CartesianProjection, PolarProjectionDepth
+from models.voting import (
+    argmax_xyr,
+    conv2d_fft_batchwise,
+    expectation_xyr,
+    log_softmax_spatial,
+    mask_yaw_prior,
+    nll_loss_xyr,
+    nll_loss_xyr_smoothed,
+    TemplateSampler,
+    UAVTemplateSampler,
+    UAVTemplateSamplerFast
+)
+from .map_encoder import MapEncoder
+from .metrics import AngleError, AngleRecall, Location2DError, Location2DRecall
+
+
+class MapLocNet(BaseModel):
+    default_conf = {
+        "image_size": "???",
+        "val_citys":"???",
+        "image_encoder": "???",
+        "map_encoder": "???",
+        "bev_net": "???",
+        "latent_dim": "???",
+        "matching_dim": "???",
+        "scale_range": [0, 9],
+        "num_scale_bins": "???",
+        "z_min": None,
+        "z_max": "???",
+        "x_max": "???",
+        "pixel_per_meter": "???",
+        "num_rotations": "???",
+        "add_temperature": False,
+        "normalize_features": False,
+        "padding_matching": "replicate",
+        "apply_map_prior": True,
+        "do_label_smoothing": False,
+        "sigma_xy": 1,
+        "sigma_r": 2,
+        # depcreated
+        "depth_parameterization": "scale",
+        "norm_depth_scores": False,
+        "normalize_scores_by_dim": False,
+        "normalize_scores_by_num_valid": True,
+        "prior_renorm": True,
+        "retrieval_dim": None,
+    }
+
+    def _init(self, conf):
+        assert not self.conf.norm_depth_scores
+        assert self.conf.depth_parameterization == "scale"
+        assert not self.conf.normalize_scores_by_dim
+        assert self.conf.normalize_scores_by_num_valid
+        assert self.conf.prior_renorm
+
+        Encoder = get_model(conf.image_encoder.get("name", "feature_extractor_v2"))
+        self.image_encoder = Encoder(conf.image_encoder.backbone)
+        self.map_encoder = MapEncoder(conf.map_encoder)
+        # self.bev_net = None if conf.bev_net is None else BEVNet(conf.bev_net)
+
+        ppm = conf.pixel_per_meter
+        # self.projection_polar = PolarProjectionDepth(
+        #     conf.z_max,
+        #     ppm,
+        #     conf.scale_range,
+        #     conf.z_min,
+        # )
+        # self.projection_bev = CartesianProjection(
+        #     conf.z_max, conf.x_max, ppm, conf.z_min
+        # )
+        # self.template_sampler = TemplateSampler(
+        #     self.projection_bev.grid_xz, ppm, conf.num_rotations
+        # )
+        self.template_sampler = UAVTemplateSamplerFast(conf.num_rotations,w=conf.image_size//2)
+        # self.template_sampler = UAVTemplateSampler(conf.num_rotations)
+        # self.scale_classifier = torch.nn.Linear(conf.latent_dim, conf.num_scale_bins)
+        # if conf.bev_net is None:
+        #     self.feature_projection = torch.nn.Linear(
+        #         conf.latent_dim, conf.matching_dim
+        #     )
+        if conf.add_temperature:
+            temperature = torch.nn.Parameter(torch.tensor(0.0))
+            self.register_parameter("temperature", temperature)
+
+    def exhaustive_voting(self, f_bev, f_map):
+        if self.conf.normalize_features:
+            f_bev = normalize(f_bev, dim=1)
+            f_map = normalize(f_map, dim=1)
+
+        # Build the templates and exhaustively match against the map.
+        # if confidence_bev is not None:
+        #     f_bev = f_bev * confidence_bev.unsqueeze(1)
+        # f_bev = f_bev.masked_fill(~valid_bev.unsqueeze(1), 0.0)
+        # torch.save(f_bev, 'f_bev.pt')
+        # torch.save(f_map, 'f_map.pt')
+
+        templates = self.template_sampler(f_bev)#[batch,256,8,129,129]
+        # torch.save(templates, 'templates.pt')
+        with torch.autocast("cuda", enabled=False):
+            scores = conv2d_fft_batchwise(
+                f_map.float(),
+                templates.float(),
+                padding_mode=self.conf.padding_matching,
+            )
+        if self.conf.add_temperature:
+            scores = scores * torch.exp(self.temperature)
+
+        # Reweight the different rotations based on the number of valid pixels
+        # in each template. Axis-aligned rotation have the maximum number of valid pixels.
+        # valid_templates = self.template_sampler(valid_bev.float()[None]) > (1 - 1e-4)
+        # num_valid = valid_templates.float().sum((-3, -2, -1))
+        # scores = scores / num_valid[..., None, None]
+        return scores
+
+    def _forward(self, data):
+        pred = {}
+        pred_map = pred["map"] = self.map_encoder(data)
+        f_map = pred_map["map_features"][0]#[batch,8,256,256]
+
+        # Extract image features.
+        level = 0
+        f_image = self.image_encoder(data)["feature_maps"][level]#[batch,128,128,176]
+        # print("f_map:",f_map.shape)
+
+        scores = self.exhaustive_voting(f_image, f_map)#f_bev:[batch,8,64,129]  f_map:[batch,8,256,256] confidence:[1,64,129]
+        scores = scores.moveaxis(1, -1)  # B,H,W,N
+        if "log_prior" in pred_map and self.conf.apply_map_prior:
+            scores = scores + pred_map["log_prior"][0].unsqueeze(-1)
+        # pred["scores_unmasked"] = scores.clone()
+        if "map_mask" in data:
+            scores.masked_fill_(~data["map_mask"][..., None], -np.inf)
+        if "yaw_prior" in data:
+            mask_yaw_prior(scores, data["yaw_prior"], self.conf.num_rotations)
+        log_probs = log_softmax_spatial(scores)
+        # torch.save(scores, 'scores.pt')
+        with torch.no_grad():
+            uvr_max = argmax_xyr(scores).to(scores)
+            uvr_avg, _ = expectation_xyr(log_probs.exp())
+
+        return {
+            **pred,
+            "scores": scores,
+            "log_probs": log_probs,
+            "uvr_max": uvr_max,
+            "uv_max": uvr_max[..., :2],
+            "yaw_max": uvr_max[..., 2],
+            "uvr_expectation": uvr_avg,
+            "uv_expectation": uvr_avg[..., :2],
+            "yaw_expectation": uvr_avg[..., 2],
+            "features_image": f_image,
+        }
+
+    def loss(self, pred, data):
+        xy_gt = data["uv"]
+        yaw_gt = data["roll_pitch_yaw"][..., -1]
+        if self.conf.do_label_smoothing:
+            nll = nll_loss_xyr_smoothed(
+                pred["log_probs"],
+                xy_gt,
+                yaw_gt,
+                self.conf.sigma_xy / self.conf.pixel_per_meter,
+                self.conf.sigma_r,
+                mask=data.get("map_mask"),
+            )
+        else:
+            nll = nll_loss_xyr(pred["log_probs"], xy_gt, yaw_gt)
+        loss = {"total": nll, "nll": nll}
+        if self.training and self.conf.add_temperature:
+            loss["temperature"] = self.temperature.expand(len(nll))
+        return loss
+
+    def metrics(self):
+        return {
+            "xy_max_error": Location2DError("uv_max", self.conf.pixel_per_meter),
+            "xy_expectation_error": Location2DError(
+                "uv_expectation", self.conf.pixel_per_meter
+            ),
+            "yaw_max_error": AngleError("yaw_max"),
+            "xy_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            "xy_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            "xy_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+
+            # "x_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            # "x_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            # "x_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+            #
+            # "y_recall_1m": Location2DRecall(1.0, self.conf.pixel_per_meter, "uv_max"),
+            # "y_recall_3m": Location2DRecall(3.0, self.conf.pixel_per_meter, "uv_max"),
+            # "y_recall_5m": Location2DRecall(5.0, self.conf.pixel_per_meter, "uv_max"),
+
+            "yaw_recall_1°": AngleRecall(1.0, "yaw_max"),
+            "yaw_recall_3°": AngleRecall(3.0, "yaw_max"),
+            "yaw_recall_5°": AngleRecall(5.0, "yaw_max"),
+        }
diff --git a/models/sequential.py b/models/sequential.py
new file mode 100644
index 0000000000000000000000000000000000000000..606be3fe7a1861c98c178f446ed8774a4755ce77
--- /dev/null
+++ b/models/sequential.py
@@ -0,0 +1,218 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+import torch
+
+from .voting import argmax_xyr, log_softmax_spatial, sample_xyr
+from .utils import deg2rad, make_grid, rotmat2d
+
+
+def log_gaussian(points, mean, sigma):
+    return -1 / 2 * torch.sum((points - mean) ** 2, -1) / sigma**2
+
+
+def log_laplace(points, mean, sigma):
+    return -torch.sum(torch.abs(points - mean), -1) / sigma
+
+
+def propagate_belief(
+    Δ_xy, Δ_yaw, canvas_target, canvas_source, belief, num_rotations=None
+):
+    # We allow a different sampling resolution in the target frame
+    if num_rotations is None:
+        num_rotations = belief.shape[-1]
+
+    angles = torch.arange(
+        0, 360, 360 / num_rotations, device=Δ_xy.device, dtype=Δ_xy.dtype
+    )
+    uv_grid = make_grid(canvas_target.w, canvas_target.h, device=Δ_xy.device)
+    xy_grid = canvas_target.to_xy(uv_grid.to(Δ_xy))
+
+    Δ_xy_world = torch.einsum("nij,j->ni", rotmat2d(deg2rad(-angles)), Δ_xy)
+    xy_grid_prev = xy_grid[..., None, :] + Δ_xy_world[..., None, None, :, :]
+    uv_grid_prev = canvas_source.to_uv(xy_grid_prev).to(Δ_xy)
+
+    angles_prev = angles + Δ_yaw
+    angles_grid_prev = angles_prev.tile((canvas_target.h, canvas_target.w, 1))
+
+    prior, valid = sample_xyr(
+        belief[None, None],
+        uv_grid_prev.to(belief)[None],
+        angles_grid_prev.to(belief)[None],
+        nearest_for_inf=True,
+    )
+    return prior, valid
+
+
+def markov_filtering(observations, canvas, xys, yaws, idxs=None):
+    assert len(observations) == len(canvas) == len(xys) == len(yaws)
+    if idxs is None:
+        idxs = range(len(observations))
+    belief = None
+    beliefs = []
+    for i in idxs:
+        obs = observations[i]
+        if belief is None:
+            belief = obs
+        else:
+            Δ_xy = rotmat2d(deg2rad(yaws[i])) @ (xys[i - 1] - xys[i])
+            Δ_yaw = yaws[i - 1] - yaws[i]
+            prior, valid = propagate_belief(
+                Δ_xy, Δ_yaw, canvas[i], canvas[i - 1], belief
+            )
+            prior = prior[0, 0].masked_fill_(~valid[0], -np.inf)
+            belief = prior + obs
+            belief = log_softmax_spatial(belief)
+        beliefs.append(belief)
+    uvt_seq = torch.stack([argmax_xyr(p) for p in beliefs])
+    return beliefs, uvt_seq
+
+
+def integrate_observation(
+    source,
+    target,
+    xy_source,
+    xy_target,
+    yaw_source,
+    yaw_target,
+    canvas_source,
+    canvas_target,
+    **kwargs
+):
+    Δ_xy = rotmat2d(deg2rad(yaw_target)) @ (xy_source - xy_target)
+    Δ_yaw = yaw_source - yaw_target
+    prior, valid = propagate_belief(
+        Δ_xy, Δ_yaw, canvas_target, canvas_source, source, **kwargs
+    )
+    prior = prior[0, 0].masked_fill_(~valid[0], -np.inf)
+    target.add_(prior)
+    target.sub_(target.max())  # normalize to avoid overflow
+    return prior
+
+
+class RigidAligner:
+    def __init__(
+        self,
+        canvas_ref=None,
+        xy_ref=None,
+        yaw_ref=None,
+        num_rotations=None,
+        track_priors=False,
+    ):
+        self.canvas = canvas_ref
+        self.xy_ref = xy_ref
+        self.yaw_ref = yaw_ref
+        self.rotmat_ref = None
+        self.num_rotations = num_rotations
+        self.belief = None
+        self.priors = [] if track_priors else None
+
+        self.yaw_slam2geo = None
+        self.Rt_slam2geo = None
+
+    def update(self, observation, canvas, xy, yaw):
+        # initialization
+        if self.canvas is None:
+            self.canvas = canvas
+        if self.xy_ref is None:
+            self.xy_ref = xy
+            self.yaw_ref = yaw
+            self.rotmat_ref = rotmat2d(deg2rad(self.yaw_ref))
+        if self.num_rotations is None:
+            self.num_rotations = observation.shape[-1]
+        if self.belief is None:
+            self.belief = observation.new_zeros(
+                (self.canvas.h, self.canvas.w, self.num_rotations)
+            )
+
+        prior = integrate_observation(
+            observation,
+            self.belief,
+            xy,
+            self.xy_ref,
+            yaw,
+            self.yaw_ref,
+            canvas,
+            self.canvas,
+            num_rotations=self.num_rotations,
+        )
+
+        if self.priors is not None:
+            self.priors.append(prior.cpu())
+        return prior
+
+    def update_with_ref(self, observation, canvas, xy, yaw):
+        if self.belief is not None:
+            observation = observation.clone()
+            integrate_observation(
+                self.belief,
+                observation,
+                self.xy_ref,
+                xy,
+                self.yaw_ref,
+                yaw,
+                self.canvas,
+                canvas,
+                num_rotations=observation.shape[-1],
+            )
+
+        self.belief = observation
+        self.canvas = canvas
+        self.xy_ref = xy
+        self.yaw_ref = yaw
+
+    def compute(self):
+        uvt_align_ref = argmax_xyr(self.belief)
+        self.yaw_ref_align = uvt_align_ref[-1]
+        self.xy_ref_align = self.canvas.to_xy(uvt_align_ref[:2].double())
+
+        self.yaw_slam2geo = self.yaw_ref - self.yaw_ref_align
+        R_slam2geo = rotmat2d(deg2rad(self.yaw_slam2geo))
+        t_slam2geo = self.xy_ref_align - R_slam2geo @ self.xy_ref
+        self.Rt_slam2geo = (R_slam2geo, t_slam2geo)
+
+    def transform(self, xy, yaw):
+        if self.Rt_slam2geo is None or self.yaw_slam2geo is None:
+            raise ValueError("Missing transformation, call `compute()` first!")
+        xy_geo = self.Rt_slam2geo[1].to(xy) + xy @ self.Rt_slam2geo[0].T.to(xy)
+        return xy_geo, (self.yaw_slam2geo.to(yaw) + yaw) % 360
+
+
+class GPSAligner(RigidAligner):
+    def __init__(self, distribution=log_laplace, **kwargs):
+        self.distribution = distribution
+        super().__init__(**kwargs)
+        if self.num_rotations is None:
+            raise ValueError("Rotation number is required.")
+        angles = torch.arange(0, 360, 360 / self.num_rotations)
+        self.rotmats = rotmat2d(deg2rad(-angles))
+        self.xy_grid = None
+
+    def update(self, xy_gps, accuracy, canvas, xy, yaw):
+        # initialization
+        if self.canvas is None:
+            self.canvas = canvas
+        if self.xy_ref is None:
+            self.xy_ref = xy
+            self.yaw_ref = yaw
+            self.rotmat_ref = rotmat2d(deg2rad(self.yaw_ref))
+        if self.xy_grid is None:
+            self.xy_grid = self.canvas.to_xy(make_grid(self.canvas.w, self.canvas.h))
+        if self.belief is None:
+            self.belief = xy_gps.new_zeros(
+                (self.canvas.h, self.canvas.w, self.num_rotations)
+            )
+
+        # integration
+        Δ_xy = self.rotmat_ref @ (xy - self.xy_ref)
+        Δ_xy_world = torch.einsum("nij,j->ni", self.rotmats.to(xy), Δ_xy)
+        xy_grid_prev = (
+            self.xy_grid.to(xy)[..., None, :] + Δ_xy_world[..., None, None, :, :]
+        )
+        prior = self.distribution(xy_grid_prev, xy_gps, accuracy)
+        self.belief.add_(prior)
+        self.belief.sub_(self.belief.max())  # normalize to avoid overflow
+
+        if self.priors is not None:
+            self.priors.append(prior.cpu())
+        return prior
diff --git a/models/utils.py b/models/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec246b7d0adf04cc9307475867650523f67a5063
--- /dev/null
+++ b/models/utils.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import math
+from typing import Optional
+
+import torch
+
+
+def checkpointed(cls, do=True):
+    """Adapted from the DISK implementation of Michał Tyszkiewicz."""
+    assert issubclass(cls, torch.nn.Module)
+
+    class Checkpointed(cls):
+        def forward(self, *args, **kwargs):
+            super_fwd = super(Checkpointed, self).forward
+            if any((torch.is_tensor(a) and a.requires_grad) for a in args):
+                return torch.utils.checkpoint.checkpoint(super_fwd, *args, **kwargs)
+            else:
+                return super_fwd(*args, **kwargs)
+
+    return Checkpointed if do else cls
+
+
+class GlobalPooling(torch.nn.Module):
+    def __init__(self, kind):
+        super().__init__()
+        if kind == "mean":
+            self.fn = torch.nn.Sequential(
+                torch.nn.Flatten(2), torch.nn.AdaptiveAvgPool1d(1), torch.nn.Flatten()
+            )
+        elif kind == "max":
+            self.fn = torch.nn.Sequential(
+                torch.nn.Flatten(2), torch.nn.AdaptiveMaxPool1d(1), torch.nn.Flatten()
+            )
+        else:
+            raise ValueError(f"Unknown pooling type {kind}.")
+
+    def forward(self, x):
+        return self.fn(x)
+
+
+@torch.jit.script
+def make_grid(
+    w: float,
+    h: float,
+    step_x: float = 1.0,
+    step_y: float = 1.0,
+    orig_x: float = 0,
+    orig_y: float = 0,
+    y_up: bool = False,
+    device: Optional[torch.device] = None,
+) -> torch.Tensor:
+    x, y = torch.meshgrid(
+        [
+            torch.arange(orig_x, w + orig_x, step_x, device=device),
+            torch.arange(orig_y, h + orig_y, step_y, device=device),
+        ],
+        indexing="xy",
+    )
+    if y_up:
+        y = y.flip(-2)
+    grid = torch.stack((x, y), -1)
+    return grid
+
+
+@torch.jit.script
+def rotmat2d(angle: torch.Tensor) -> torch.Tensor:
+    c = torch.cos(angle)
+    s = torch.sin(angle)
+    R = torch.stack([c, -s, s, c], -1).reshape(angle.shape + (2, 2))
+    return R
+
+
+@torch.jit.script
+def rotmat2d_grad(angle: torch.Tensor) -> torch.Tensor:
+    c = torch.cos(angle)
+    s = torch.sin(angle)
+    R = torch.stack([-s, -c, c, -s], -1).reshape(angle.shape + (2, 2))
+    return R
+
+
+def deg2rad(x):
+    return x * math.pi / 180
+
+
+def rad2deg(x):
+    return x * 180 / math.pi
diff --git a/models/voting.py b/models/voting.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a4295649a80edb5610af8d8abff5993f2ea4c06
--- /dev/null
+++ b/models/voting.py
@@ -0,0 +1,387 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+from torch.fft import irfftn, rfftn
+from torch.nn.functional import grid_sample, log_softmax, pad
+
+from .metrics import angle_error
+from .utils import make_grid, rotmat2d
+from torchvision.transforms.functional import rotate
+
+class UAVTemplateSamplerFast(torch.nn.Module):
+    def __init__(self, num_rotations,w=128,optimize=True):
+        super().__init__()
+
+        h, w = w,w
+        grid_xy = make_grid(
+            w=w,
+            h=h,
+            step_x=1,
+            step_y=1,
+            orig_y=-h//2,
+            orig_x=-h//2,
+            y_up=True,
+        ).cuda()
+
+        if optimize:
+            assert (num_rotations % 4) == 0
+            angles = torch.arange(
+                0, 90, 90 / (num_rotations // 4)
+            ).cuda()
+        else:
+            angles = torch.arange(
+                0, 360, 360 / num_rotations, device=grid_xz_bev.device
+            )
+        rotmats = rotmat2d(angles / 180 * np.pi)
+        grid_xy_rot = torch.einsum("...nij,...hwj->...nhwi", rotmats, grid_xy)
+
+        grid_ij_rot = (grid_xy_rot - grid_xy[..., :1, :1, :]) * grid_xy.new_tensor(
+            [1, -1]
+        )
+        grid_ij_rot = grid_ij_rot
+        grid_norm = (grid_ij_rot + 0.5) / grid_ij_rot.new_tensor([w, h]) * 2 - 1
+
+        self.optimize = optimize
+        self.num_rots = num_rotations
+        self.register_buffer("angles", angles, persistent=False)
+        self.register_buffer("grid_norm", grid_norm, persistent=False)
+
+    def forward(self, image_bev):
+        grid = self.grid_norm
+        b, c = image_bev.shape[:2]
+        n, h, w = grid.shape[:3]
+        grid = grid[None].repeat_interleave(b, 0).reshape(b * n, h, w, 2)
+        image = (
+            image_bev[:, None]
+            .repeat_interleave(n, 1)
+            .reshape(b * n, *image_bev.shape[1:])
+        )
+        # print(image.shape,grid.shape,self.grid_norm.shape)
+        kernels = grid_sample(image, grid.to(image.dtype), align_corners=False).reshape(
+            b, n, c, h, w
+        )
+
+        if self.optimize:  # we have computed only the first quadrant
+            kernels_quad234 = [torch.rot90(kernels, -i, (-2, -1)) for i in (1, 2, 3)]
+            kernels = torch.cat([kernels] + kernels_quad234, 1)
+
+        return kernels
+class UAVTemplateSampler(torch.nn.Module):
+    def __init__(self, num_rotations):
+        super().__init__()
+
+        self.num_rotations = num_rotations
+
+    def Template(self, input_features):
+        # 角度数量
+        num_angles = self.num_rotations
+        # 扩展第二个维度为旋转角度数量
+        input_shape = torch.tensor(input_features.shape)
+        output_shape = torch.cat((input_shape[:1], torch.tensor([num_angles]), input_shape[1:])).tolist()
+        expanded_features = torch.zeros(output_shape,device=input_features.device)
+
+        # 生成旋转角度序列
+        rotation_angles = torch.linspace(360, 0, 64 + 1)[:-1]
+        # rotation_angles=torch.flip(rotation_angles, dims=[0])
+        # 对扩展后的特征应用不同的旋转角度
+        rotated_features = []
+        # print(len(rotation_angles))
+        for i in range(len(rotation_angles)):
+            # print(rotation_angles[i].item())
+            rotated_feature = rotate(input_features, rotation_angles[i].item(), fill=0)
+            expanded_features[:, i, :, :, :] = rotated_feature
+
+        # 将所有旋转后的特征堆叠起来形成最终的输出向量
+        # output_features = torch.stack(rotated_features, dim=1)
+
+        # 输出向量的维度
+        # output_size = [3, num_angles, 8, 128, 128]
+        return expanded_features  # 输出调试信息，验证输出向量的维度是否正确
+    def forward(self, image_bev):
+
+        kernels=self.Template(image_bev)
+
+        return kernels
+class TemplateSampler(torch.nn.Module):
+    def __init__(self, grid_xz_bev, ppm, num_rotations, optimize=True):
+        super().__init__()
+
+        Δ = 1 / ppm
+        h, w = grid_xz_bev.shape[:2]
+        ksize = max(w, h * 2 + 1)
+        radius = ksize * Δ
+        grid_xy = make_grid(
+            radius,
+            radius,
+            step_x=Δ,
+            step_y=Δ,
+            orig_y=(Δ - radius) / 2,
+            orig_x=(Δ - radius) / 2,
+            y_up=True,
+        )
+
+        if optimize:
+            assert (num_rotations % 4) == 0
+            angles = torch.arange(
+                0, 90, 90 / (num_rotations // 4), device=grid_xz_bev.device
+            )
+        else:
+            angles = torch.arange(
+                0, 360, 360 / num_rotations, device=grid_xz_bev.device
+            )
+        rotmats = rotmat2d(angles / 180 * np.pi)
+        grid_xy_rot = torch.einsum("...nij,...hwj->...nhwi", rotmats, grid_xy)
+
+        grid_ij_rot = (grid_xy_rot - grid_xz_bev[..., :1, :1, :]) * grid_xy.new_tensor(
+            [1, -1]
+        )
+        grid_ij_rot = grid_ij_rot / Δ
+        grid_norm = (grid_ij_rot + 0.5) / grid_ij_rot.new_tensor([w, h]) * 2 - 1
+
+        self.optimize = optimize
+        self.num_rots = num_rotations
+        self.register_buffer("angles", angles, persistent=False)
+        self.register_buffer("grid_norm", grid_norm, persistent=False)
+
+    def forward(self, image_bev):
+        grid = self.grid_norm
+        b, c = image_bev.shape[:2]
+        n, h, w = grid.shape[:3]
+        grid = grid[None].repeat_interleave(b, 0).reshape(b * n, h, w, 2)
+        image = (
+            image_bev[:, None]
+            .repeat_interleave(n, 1)
+            .reshape(b * n, *image_bev.shape[1:])
+        )
+        kernels = grid_sample(image, grid.to(image.dtype), align_corners=False).reshape(
+            b, n, c, h, w
+        )
+
+        if self.optimize:  # we have computed only the first quadrant
+            kernels_quad234 = [torch.rot90(kernels, -i, (-2, -1)) for i in (1, 2, 3)]
+            kernels = torch.cat([kernels] + kernels_quad234, 1)
+
+        return kernels
+
+
+def conv2d_fft_batchwise(signal, kernel, padding="same", padding_mode="constant"):
+    if padding == "same":
+        padding = [i // 2 for i in kernel.shape[-2:]]
+    padding_signal = [p for p in padding[::-1] for _ in range(2)]
+    signal = pad(signal, padding_signal, mode=padding_mode)
+    assert signal.size(-1) % 2 == 0
+
+    padding_kernel = [
+        pad for i in [1, 2] for pad in [0, signal.size(-i) - kernel.size(-i)]
+    ]
+    kernel_padded = pad(kernel, padding_kernel)
+
+    signal_fr = rfftn(signal, dim=(-1, -2))
+    kernel_fr = rfftn(kernel_padded, dim=(-1, -2))
+
+    kernel_fr.imag *= -1  # flip the kernel
+    output_fr = torch.einsum("bc...,bdc...->bd...", signal_fr, kernel_fr)
+    output = irfftn(output_fr, dim=(-1, -2))
+
+    crop_slices = [slice(0, output.size(0)), slice(0, output.size(1))] + [
+        slice(0, (signal.size(i) - kernel.size(i) + 1)) for i in [-2, -1]
+    ]
+    output = output[crop_slices].contiguous()
+
+    return output
+
+
+class SparseMapSampler(torch.nn.Module):
+    def __init__(self, num_rotations):
+        super().__init__()
+        angles = torch.arange(0, 360, 360 / self.conf.num_rotations)
+        rotmats = rotmat2d(angles / 180 * np.pi)
+        self.num_rotations = num_rotations
+        self.register_buffer("rotmats", rotmats, persistent=False)
+
+    def forward(self, image_map, p2d_bev):
+        h, w = image_map.shape[-2:]
+        locations = make_grid(w, h, device=p2d_bev.device)
+        p2d_candidates = torch.einsum(
+            "kji,...i,->...kj", self.rotmats.to(p2d_bev), p2d_bev
+        )
+        p2d_candidates = p2d_candidates[..., None, None, :, :] + locations.unsqueeze(-1)
+        # ... x N x W x H x K x 2
+
+        p2d_norm = (p2d_candidates / (image_map.new_tensor([w, h]) - 1)) * 2 - 1
+        valid = torch.all((p2d_norm >= -1) & (p2d_norm <= 1), -1)
+        value = grid_sample(
+            image_map, p2d_norm.flatten(-4, -2), align_corners=True, mode="bilinear"
+        )
+        value = value.reshape(image_map.shape[:2] + valid.shape[-4])
+        return valid, value
+
+
+def sample_xyr(volume, xy_grid, angle_grid, nearest_for_inf=False):
+    # (B, C, H, W, N) to (B, C, H, W, N+1)
+    volume_padded = pad(volume, [0, 1, 0, 0, 0, 0], mode="circular")
+
+    size = xy_grid.new_tensor(volume.shape[-3:-1][::-1])
+    xy_norm = xy_grid / (size - 1)  # align_corners=True
+    angle_norm = (angle_grid / 360) % 1
+    grid = torch.concat([angle_norm.unsqueeze(-1), xy_norm], -1)
+    grid_norm = grid * 2 - 1
+
+    valid = torch.all((grid_norm >= -1) & (grid_norm <= 1), -1)
+    value = grid_sample(volume_padded, grid_norm, align_corners=True, mode="bilinear")
+
+    # if one of the values used for linear interpolation is infinite,
+    # we fallback to nearest to avoid propagating inf
+    if nearest_for_inf:
+        value_nearest = grid_sample(
+            volume_padded, grid_norm, align_corners=True, mode="nearest"
+        )
+        value = torch.where(~torch.isfinite(value) & valid, value_nearest, value)
+
+    return value, valid
+
+
+def nll_loss_xyr(log_probs, xy, angle):
+    log_prob, _ = sample_xyr(
+        log_probs.unsqueeze(1), xy[:, None, None, None], angle[:, None, None, None]
+    )
+    nll = -log_prob.reshape(-1)  # remove C,H,W,N
+    return nll
+
+
+def nll_loss_xyr_smoothed(log_probs, xy, angle, sigma_xy, sigma_r, mask=None):
+    *_, nx, ny, nr = log_probs.shape
+    grid_x = torch.arange(nx, device=log_probs.device, dtype=torch.float)
+    dx = (grid_x - xy[..., None, 0]) / sigma_xy
+    grid_y = torch.arange(ny, device=log_probs.device, dtype=torch.float)
+    dy = (grid_y - xy[..., None, 1]) / sigma_xy
+    dr = (
+        torch.arange(0, 360, 360 / nr, device=log_probs.device, dtype=torch.float)
+        - angle[..., None]
+    ) % 360
+    dr = torch.minimum(dr, 360 - dr) / sigma_r
+    diff = (
+        dx[..., None, :, None] ** 2
+        + dy[..., :, None, None] ** 2
+        + dr[..., None, None, :] ** 2
+    )
+    pdf = torch.exp(-diff / 2)
+    if mask is not None:
+        pdf.masked_fill_(~mask[..., None], 0)
+        log_probs = log_probs.masked_fill(~mask[..., None], 0)
+    pdf /= pdf.sum((-1, -2, -3), keepdim=True)
+    return -torch.sum(pdf * log_probs.to(torch.float), dim=(-1, -2, -3))
+
+
+def log_softmax_spatial(x, dims=3):
+    return log_softmax(x.flatten(-dims), dim=-1).reshape(x.shape)
+
+
+@torch.jit.script
+def argmax_xy(scores: torch.Tensor) -> torch.Tensor:
+    indices = scores.flatten(-2).max(-1).indices
+    width = scores.shape[-1]
+    x = indices % width
+    y = torch.div(indices, width, rounding_mode="floor")
+    return torch.stack((x, y), -1)
+
+
+@torch.jit.script
+def expectation_xy(prob: torch.Tensor) -> torch.Tensor:
+    h, w = prob.shape[-2:]
+    grid = make_grid(float(w), float(h), device=prob.device).to(prob)
+    return torch.einsum("...hw,hwd->...d", prob, grid)
+
+
+@torch.jit.script
+def expectation_xyr(
+    prob: torch.Tensor, covariance: bool = False
+) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+    h, w, num_rotations = prob.shape[-3:]
+    x, y = torch.meshgrid(
+        [
+            torch.arange(w, device=prob.device, dtype=prob.dtype),
+            torch.arange(h, device=prob.device, dtype=prob.dtype),
+        ],
+        indexing="xy",
+    )
+    grid_xy = torch.stack((x, y), -1)
+    xy_mean = torch.einsum("...hwn,hwd->...d", prob, grid_xy)
+
+    angles = torch.arange(0, 1, 1 / num_rotations, device=prob.device, dtype=prob.dtype)
+    angles = angles * 2 * np.pi
+    grid_cs = torch.stack([torch.cos(angles), torch.sin(angles)], -1)
+    cs_mean = torch.einsum("...hwn,nd->...d", prob, grid_cs)
+    angle = torch.atan2(cs_mean[..., 1], cs_mean[..., 0])
+    angle = (angle * 180 / np.pi) % 360
+
+    if covariance:
+        xy_cov = torch.einsum("...hwn,...hwd,...hwk->...dk", prob, grid_xy, grid_xy)
+        xy_cov = xy_cov - torch.einsum("...d,...k->...dk", xy_mean, xy_mean)
+    else:
+        xy_cov = None
+
+    xyr_mean = torch.cat((xy_mean, angle.unsqueeze(-1)), -1)
+    return xyr_mean, xy_cov
+
+
+@torch.jit.script
+def argmax_xyr(scores: torch.Tensor) -> torch.Tensor:
+    indices = scores.flatten(-3).max(-1).indices
+    width, num_rotations = scores.shape[-2:]
+    wr = width * num_rotations
+    y = torch.div(indices, wr, rounding_mode="floor")
+    x = torch.div(indices % wr, num_rotations, rounding_mode="floor")
+    angle_index = indices % num_rotations
+    angle = angle_index * 360 / num_rotations
+    xyr = torch.stack((x, y, angle), -1)
+    return xyr
+def argmax_xyrh(scores: torch.Tensor) -> torch.Tensor:
+    # def argmax_xyr(scores: torch.Tensor) -> torch.Tensor:
+    # 找到最大值及其索引
+    batch, h, w, num_rotations, height = scores.shape
+    max_values, max_indices = torch.max(scores.view(batch, -1), dim=1)
+
+    # 将索引转换为对应的坐标
+    # max_indices = torch.stack([(max_indices % (h * num_rotations * height)) // (num_rotations * height),
+    #                            max_indices // (h * num_rotations * height),
+    #                            (max_indices % (num_rotations * height)) // height,
+    #                            max_indices % height], dim=1)
+
+    max_indices = torch.stack([
+        torch.div(max_indices % (h * num_rotations * height), (num_rotations * height), rounding_mode='trunc'),
+        torch.div(max_indices, (h * num_rotations * height), rounding_mode='trunc'),
+        torch.div(max_indices % (num_rotations * height), height, rounding_mode='trunc'),
+        max_indices % height
+    ], dim=1)
+    # angle_index = indices % num_rotations
+    # angle = angle_index * 360 / num_rotations
+    angle=max_indices[..., 2]* 360 / num_rotations
+    max_indices[..., 2]=angle
+    return max_indices
+
+@torch.jit.script
+def mask_yaw_prior(
+    scores: torch.Tensor, yaw_prior: torch.Tensor, num_rotations: int
+) -> torch.Tensor:
+    step = 360 / num_rotations
+    step_2 = step / 2
+    angles = torch.arange(step_2, 360 + step_2, step, device=scores.device)
+    yaw_init, yaw_range = yaw_prior.chunk(2, dim=-1)
+    rot_mask = angle_error(angles, yaw_init) < yaw_range
+    return scores.masked_fill_(~rot_mask[:, None, None], -np.inf)
+
+
+def fuse_gps(log_prob, uv_gps, ppm, sigma=10, gaussian=False):
+    grid = make_grid(*log_prob.shape[-3:-1][::-1]).to(log_prob)
+    dist = torch.sum((grid - uv_gps) ** 2, -1)
+    sigma_pixel = sigma * ppm
+    if gaussian:
+        gps_log_prob = -1 / 2 * dist / sigma_pixel**2
+    else:
+        gps_log_prob = torch.where(dist < sigma_pixel**2, 1, -np.inf)
+    log_prob_fused = log_softmax_spatial(log_prob + gps_log_prob.unsqueeze(-1))
+    return log_prob_fused
diff --git a/osm/analysis.py b/osm/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..a667c21373a31482f7bbcfb41d4fa14681741260
--- /dev/null
+++ b/osm/analysis.py
@@ -0,0 +1,182 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from collections import Counter, defaultdict
+from typing import Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import plotly.graph_objects as go
+
+from .parser import (
+    filter_area,
+    filter_node,
+    filter_way,
+    match_to_group,
+    parse_area,
+    parse_node,
+    parse_way,
+    Patterns,
+)
+from .reader import OSMData
+
+
+def recover_hierarchy(counter: Counter) -> Dict:
+    """Recover a two-level hierarchy from the flat group labels."""
+    groups = defaultdict(dict)
+    for k, v in sorted(counter.items(), key=lambda x: -x[1]):
+        if ":" in k:
+            prefix, group = k.split(":")
+            if prefix in groups and isinstance(groups[prefix], int):
+                groups[prefix] = {}
+                groups[prefix][prefix] = groups[prefix]
+                groups[prefix] = {}
+            groups[prefix][group] = v
+        else:
+            groups[k] = v
+    return dict(groups)
+
+
+def bar_autolabel(rects, fontsize):
+    """Attach a text label above each bar in *rects*, displaying its height."""
+    for rect in rects:
+        width = rect.get_width()
+        plt.gca().annotate(
+            f"{width}",
+            xy=(width, rect.get_y() + rect.get_height() / 2),
+            xytext=(3, 0),  # 3 points vertical offset
+            textcoords="offset points",
+            ha="left",
+            va="center",
+            fontsize=fontsize,
+        )
+
+
+def plot_histogram(counts, fontsize, dpi):
+    fig, ax = plt.subplots(dpi=dpi, figsize=(8, 20))
+
+    labels = []
+    for k, v in counts.items():
+        if isinstance(v, dict):
+            labels += list(v.keys())
+            v = list(v.values())
+        else:
+            labels.append(k)
+            v = [v]
+        bars = plt.barh(
+            len(labels) + -len(v) + np.arange(len(v)), v, height=0.9, label=k
+        )
+        bar_autolabel(bars, fontsize)
+
+    ax.set_yticklabels(labels, fontsize=fontsize)
+    ax.axes.xaxis.set_ticklabels([])
+    ax.xaxis.tick_top()
+    ax.invert_yaxis()
+    plt.yticks(np.arange(len(labels)))
+    plt.xscale("log")
+    plt.legend(ncol=len(counts), loc="upper center")
+
+
+def count_elements(elems: Dict[int, str], filter_fn, parse_fn) -> Dict:
+    """Count the number of elements in each group."""
+    counts = Counter()
+    for elem in filter(filter_fn, elems.values()):
+        group = parse_fn(elem.tags)
+        if group is None:
+            continue
+        counts[group] += 1
+    counts = recover_hierarchy(counts)
+    return counts
+
+
+def plot_osm_histograms(osm: OSMData, fontsize=8, dpi=150):
+    counts = count_elements(osm.nodes, filter_node, parse_node)
+    plot_histogram(counts, fontsize, dpi)
+    plt.title("nodes")
+
+    counts = count_elements(osm.ways, filter_way, parse_way)
+    plot_histogram(counts, fontsize, dpi)
+    plt.title("ways")
+
+    counts = count_elements(osm.ways, filter_area, parse_area)
+    plot_histogram(counts, fontsize, dpi)
+    plt.title("areas")
+
+
+def plot_sankey_hierarchy(osm: OSMData):
+    triplets = []
+    for node in filter(filter_node, osm.nodes.values()):
+        label = parse_node(node.tags)
+        if label is None:
+            continue
+        group = match_to_group(label, Patterns.nodes)
+        if group is None:
+            group = match_to_group(label, Patterns.ways)
+        if group is None:
+            group = "null"
+        if ":" in label:
+            key, tag = label.split(":")
+            if tag == "yes":
+                tag = key
+        else:
+            key = tag = label
+        triplets.append((key, tag, group))
+    keys, tags, groups = list(zip(*triplets))
+    counts_key_tag = Counter(zip(keys, tags))
+    counts_key_tag_group = Counter(triplets)
+
+    key2tags = defaultdict(set)
+    for k, t in zip(keys, tags):
+        key2tags[k].add(t)
+    key2tags = {k: sorted(t) for k, t in key2tags.items()}
+    keytag2group = dict(zip(zip(keys, tags), groups))
+    key_names = sorted(set(keys))
+    tag_names = [(k, t) for k in key_names for t in key2tags[k]]
+
+    group_names = []
+    for k in key_names:
+        for t in key2tags[k]:
+            g = keytag2group[k, t]
+            if g not in group_names and g != "null":
+                group_names.append(g)
+    group_names += ["null"]
+
+    key2idx = dict(zip(key_names, range(len(key_names))))
+    tag2idx = {kt: i + len(key2idx) for i, kt in enumerate(tag_names)}
+    group2idx = {n: i + len(key2idx) + len(tag2idx) for i, n in enumerate(group_names)}
+
+    key_counts = Counter(keys)
+    key_text = [f"{k} {key_counts[k]}" for k in key_names]
+    tag_counts = Counter(list(zip(keys, tags)))
+    tag_text = [f"{t} {tag_counts[k, t]}" for k, t in tag_names]
+    group_counts = Counter(groups)
+    group_text = [f"{k} {group_counts[k]}" for k in group_names]
+
+    fig = go.Figure(
+        data=[
+            go.Sankey(
+                orientation="h",
+                node=dict(
+                    pad=15,
+                    thickness=20,
+                    line=dict(color="black", width=0.5),
+                    label=key_text + tag_text + group_text,
+                    x=[0] * len(key_names)
+                    + [1] * len(tag_names)
+                    + [2] * len(group_names),
+                    color="blue",
+                ),
+                arrangement="fixed",
+                link=dict(
+                    source=[key2idx[k] for k, _ in counts_key_tag]
+                    + [tag2idx[k, t] for k, t, _ in counts_key_tag_group],
+                    target=[tag2idx[k, t] for k, t in counts_key_tag]
+                    + [group2idx[g] for _, _, g in counts_key_tag_group],
+                    value=list(counts_key_tag.values())
+                    + list(counts_key_tag_group.values()),
+                ),
+            )
+        ]
+    )
+    fig.update_layout(autosize=False, width=800, height=2000, font_size=10)
+    fig.show()
+    return fig
diff --git a/osm/data.py b/osm/data.py
new file mode 100644
index 0000000000000000000000000000000000000000..dafc568f8ad5ac8c72ea9ffbd096838e0693ad84
--- /dev/null
+++ b/osm/data.py
@@ -0,0 +1,230 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Set, Tuple
+
+import numpy as np
+
+from .parser import (
+    filter_area,
+    filter_node,
+    filter_way,
+    match_to_group,
+    parse_area,
+    parse_node,
+    parse_way,
+    Patterns,
+)
+from .reader import OSMData, OSMNode, OSMRelation, OSMWay
+
+
+logger = logging.getLogger(__name__)
+
+
+def glue(ways: List[OSMWay]) -> List[List[OSMNode]]:
+    result: List[List[OSMNode]] = []
+    to_process: Set[Tuple[OSMNode]] = set()
+
+    for way in ways:
+        if way.is_cycle():
+            result.append(way.nodes)
+        else:
+            to_process.add(tuple(way.nodes))
+
+    while to_process:
+        nodes: List[OSMNode] = list(to_process.pop())
+        glued: Optional[List[OSMNode]] = None
+        other_nodes: Optional[Tuple[OSMNode]] = None
+
+        for other_nodes in to_process:
+            glued = try_to_glue(nodes, list(other_nodes))
+            if glued is not None:
+                break
+
+        if glued is not None:
+            to_process.remove(other_nodes)
+            if is_cycle(glued):
+                result.append(glued)
+            else:
+                to_process.add(tuple(glued))
+        else:
+            result.append(nodes)
+
+    return result
+
+
+def is_cycle(nodes: List[OSMNode]) -> bool:
+    """Is way a cycle way or an area boundary."""
+    return nodes[0] == nodes[-1]
+
+
+def try_to_glue(nodes: List[OSMNode], other: List[OSMNode]) -> Optional[List[OSMNode]]:
+    """Create new combined way if ways share endpoints."""
+    if nodes[0] == other[0]:
+        return list(reversed(other[1:])) + nodes
+    if nodes[0] == other[-1]:
+        return other[:-1] + nodes
+    if nodes[-1] == other[-1]:
+        return nodes + list(reversed(other[:-1]))
+    if nodes[-1] == other[0]:
+        return nodes + other[1:]
+    return None
+
+
+def multipolygon_from_relation(rel: OSMRelation, osm: OSMData):
+    inner_ways = []
+    outer_ways = []
+    for member in rel.members:
+        if member.type_ == "way":
+            if member.role == "inner":
+                if member.ref in osm.ways:
+                    inner_ways.append(osm.ways[member.ref])
+            elif member.role == "outer":
+                if member.ref in osm.ways:
+                    outer_ways.append(osm.ways[member.ref])
+            else:
+                logger.warning(f'Unknown member role "{member.role}".')
+    if outer_ways:
+        inners_path = glue(inner_ways)
+        outers_path = glue(outer_ways)
+        return inners_path, outers_path
+
+
+@dataclass
+class MapElement:
+    id_: int
+    label: str
+    group: str
+    tags: Optional[Dict[str, str]]
+
+
+@dataclass
+class MapNode(MapElement):
+    xy: np.ndarray
+
+    @classmethod
+    def from_osm(cls, node: OSMNode, label: str, group: str):
+        return cls(
+            node.id_,
+            label,
+            group,
+            node.tags,
+            xy=node.xy,
+        )
+
+
+@dataclass
+class MapLine(MapElement):
+    xy: np.ndarray
+
+    @classmethod
+    def from_osm(cls, way: OSMWay, label: str, group: str):
+        xy = np.stack([n.xy for n in way.nodes])
+        return cls(
+            way.id_,
+            label,
+            group,
+            way.tags,
+            xy=xy,
+        )
+
+
+@dataclass
+class MapArea(MapElement):
+    outers: List[np.ndarray]
+    inners: List[np.ndarray] = field(default_factory=list)
+
+    @classmethod
+    def from_relation(cls, rel: OSMRelation, label: str, group: str, osm: OSMData):
+        outers_inners = multipolygon_from_relation(rel, osm)
+        if outers_inners is None:
+            return None
+        outers, inners = outers_inners
+        outers = [np.stack([n.xy for n in way]) for way in outers]
+        inners = [np.stack([n.xy for n in way]) for way in inners]
+        return cls(
+            rel.id_,
+            label,
+            group,
+            rel.tags,
+            outers=outers,
+            inners=inners,
+        )
+
+    @classmethod
+    def from_way(cls, way: OSMWay, label: str, group: str):
+        xy = np.stack([n.xy for n in way.nodes])
+        return cls(
+            way.id_,
+            label,
+            group,
+            way.tags,
+            outers=[xy],
+        )
+
+
+class MapData:
+    def __init__(self):
+        self.nodes: Dict[int, MapNode] = {}
+        self.lines: Dict[int, MapLine] = {}
+        self.areas: Dict[int, MapArea] = {}
+
+    @classmethod
+    def from_osm(cls, osm: OSMData):
+        self = cls()
+
+        for node in filter(filter_node, osm.nodes.values()):
+            label = parse_node(node.tags)
+            if label is None:
+                continue
+            group = match_to_group(label, Patterns.nodes)
+            if group is None:
+                group = match_to_group(label, Patterns.ways)
+            if group is None:
+                continue  # missing
+            self.nodes[node.id_] = MapNode.from_osm(node, label, group)
+
+        for way in filter(filter_way, osm.ways.values()):
+            label = parse_way(way.tags)
+            if label is None:
+                continue
+            group = match_to_group(label, Patterns.ways)
+            if group is None:
+                group = match_to_group(label, Patterns.nodes)
+            if group is None:
+                continue  # missing
+            self.lines[way.id_] = MapLine.from_osm(way, label, group)
+
+        for area in filter(filter_area, osm.ways.values()):
+            label = parse_area(area.tags)
+            if label is None:
+                continue
+            group = match_to_group(label, Patterns.areas)
+            if group is None:
+                group = match_to_group(label, Patterns.ways)
+            if group is None:
+                group = match_to_group(label, Patterns.nodes)
+            if group is None:
+                continue  # missing
+            self.areas[area.id_] = MapArea.from_way(area, label, group)
+
+        for rel in osm.relations.values():
+            if rel.tags.get("type") != "multipolygon":
+                continue
+            label = parse_area(rel.tags)
+            if label is None:
+                continue
+            group = match_to_group(label, Patterns.areas)
+            if group is None:
+                group = match_to_group(label, Patterns.ways)
+            if group is None:
+                group = match_to_group(label, Patterns.nodes)
+            if group is None:
+                continue  # missing
+            area = MapArea.from_relation(rel, label, group, osm)
+            assert rel.id_ not in self.areas  # not sure if there can be collision
+            if area is not None:
+                self.areas[rel.id_] = area
+
+        return self
diff --git a/osm/download.py b/osm/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..abaaaf4d6fb7c8c3034db8a152bbf4cff6135dc2
--- /dev/null
+++ b/osm/download.py
@@ -0,0 +1,119 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import json
+from pathlib import Path
+from typing import Dict, Optional
+
+import urllib3
+
+
+from utils.geo import BoundaryBox
+import urllib.request
+import requests
+
+def get_osm(
+    boundary_box: BoundaryBox,
+    cache_path: Optional[Path] = None,
+    overwrite: bool = False,
+) -> str:
+    if not overwrite and cache_path is not None and cache_path.is_file():
+        with cache_path.open() as fp:
+            return json.load(fp)
+
+    (bottom, left), (top, right) = boundary_box.min_, boundary_box.max_
+    content: bytes = get_web_data(
+        # "https://api.openstreetmap.org/api/0.6/map.json",
+        #"https://openstreetmap.erniubot.live/api/0.6/map.json",
+        #"https://openstreetmap.erniuapi.com/api/0.6/map.json",
+        'https://osm.erniuapi.org/api/0.6/map.json',
+        # 'http://localhost:29505/api/map',
+        # "https://lz4.overpass-api.de/api/interpreter",
+        {"bbox": f"{left},{bottom},{right},{top}"},
+    )
+
+    content_str = content.decode("utf-8")
+    if content_str.startswith("You requested too many nodes"):
+        raise ValueError(content_str)
+
+    if cache_path is not None:
+        with cache_path.open("bw+") as fp:
+            fp.write(content)
+    a=json.loads(content_str)
+    return json.loads(content_str)
+
+
+def get_web_data(address: str, parameters: Dict[str, str]) -> bytes:
+    # logger.info("Getting %s...", address)
+    # proxy_address = "http://107.173.122.186:3128"
+    #
+    # # 设置代理服务器地址和端口
+    # proxies = {
+    #     'http': proxy_address,
+    #     'https': proxy_address
+    # }
+
+    # 发送GET请求并返回响应数据
+    # response = requests.get(address, params=parameters, timeout=100, proxies=proxies)
+    print('url:',address)
+    response = requests.get(address, params=parameters, timeout=100)
+    return response.content
+def get_web_data(address: str, parameters: Dict[str, str]) -> bytes:
+    # logger.info("Getting %s...", address)
+    while True:
+        try:
+            # proxy_address = "http://107.173.122.186:3128"
+            #
+            # # 设置代理服务器地址和端口
+            # proxies = {
+            #     'http': proxy_address,
+            #     'https': proxy_address
+            # }
+            # # 发送GET请求并返回响应数据
+            response = requests.get(address, params=parameters, timeout=100)
+            request = requests.Request('GET', address, params=parameters)
+            prepared_request = request.prepare()
+            # 获取完整URL
+            full_url = prepared_request.url
+            break
+
+        except Exception as e:
+            # 打印错误信息
+            print(f"发生错误: {e}")
+            print("重试...")
+
+    return response.content
+# def get_web_data_2(address: str, parameters: Dict[str, str]) -> bytes:
+#     # logger.info("Getting %s...", address)
+#     proxy_address="http://107.173.122.186:3128"
+#     http = urllib3.PoolManager(proxy_url=proxy_address)
+#     result = http.request("GET", address, parameters, timeout=100)
+#     return result.data
+#
+#
+# def get_web_data_1(address: str, parameters: Dict[str, str]) -> bytes:
+#
+#     # 设置代理服务器地址和端口
+#     proxy_address = "http://107.173.122.186:3128"
+#
+#     # 创建ProxyHandler对象
+#     proxy_handler = urllib.request.ProxyHandler({'http': proxy_address})
+#
+#     # 构建查询字符串
+#     query_string = urllib.parse.urlencode(parameters)
+#
+#     # 构建完整的URL
+#     url = address + '?' + query_string
+#     print(url)
+#     # 创建OpenerDirector对象，并将ProxyHandler对象作为参数传递
+#     opener = urllib.request.build_opener(proxy_handler)
+#
+#     # 使用OpenerDirector对象发送请求
+#     response = opener.open(url)
+#
+#     # 发送GET请求
+#     # response = urllib.request.urlopen(url, timeout=100)
+#
+#     # 读取响应内容
+#     data = response.read()
+#     print()
+#     return data
\ No newline at end of file
diff --git a/osm/parser.py b/osm/parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..d235c71bdbdd22d280d60015b5941d25ba0345e1
--- /dev/null
+++ b/osm/parser.py
@@ -0,0 +1,255 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import logging
+import re
+from typing import List
+
+from .reader import OSMData, OSMElement, OSMNode, OSMWay
+
+IGNORE_TAGS = {"source", "phone", "entrance", "inscription", "note", "name"}
+
+
+def parse_levels(string: str) -> List[float]:
+    """Parse string representation of level sequence value."""
+    try:
+        cleaned = string.replace(",", ";").replace(" ", "")
+        return list(map(float, cleaned.split(";")))
+    except ValueError:
+        logging.debug("Cannot parse level description from `%s`.", string)
+        return []
+
+
+def filter_level(elem: OSMElement):
+    level = elem.tags.get("level")
+    if level is not None:
+        levels = parse_levels(level)
+        # In the US, ground floor levels are sometimes marked as level=1
+        # so let's be conservative and include it.
+        if not (0 in levels or 1 in levels):
+            return False
+    layer = elem.tags.get("layer")
+    if layer is not None:
+        layer = parse_levels(layer)
+        if len(layer) > 0 and max(layer) < 0:
+            return False
+    return (
+        elem.tags.get("location") != "underground"
+        and elem.tags.get("parking") != "underground"
+    )
+
+
+def filter_node(node: OSMNode):
+    return len(node.tags.keys() - IGNORE_TAGS) > 0 and filter_level(node)
+
+
+def is_area(way: OSMWay):
+    if way.nodes[0] != way.nodes[-1]:
+        return False
+    if way.tags.get("area") == "no":
+        return False
+    filters = [
+        "area",
+        "building",
+        "amenity",
+        "indoor",
+        "landuse",
+        "landcover",
+        "leisure",
+        "public_transport",
+        "shop",
+    ]
+    for f in filters:
+        if f in way.tags and way.tags.get(f) != "no":
+            return True
+    if way.tags.get("natural") in {"wood", "grassland", "water"}:
+        return True
+    return False
+
+
+def filter_area(way: OSMWay):
+    return len(way.tags.keys() - IGNORE_TAGS) > 0 and is_area(way) and filter_level(way)
+
+
+def filter_way(way: OSMWay):
+    return not filter_area(way) and way.tags != {} and filter_level(way)
+
+
+def parse_node(tags):
+    keys = tags.keys()
+    for key in [
+        "amenity",
+        "natural",
+        "highway",
+        "barrier",
+        "shop",
+        "tourism",
+        "public_transport",
+        "emergency",
+        "man_made",
+    ]:
+        if key in keys:
+            if "disused" in tags[key]:
+                continue
+            return f"{key}:{tags[key]}"
+    return None
+
+
+def parse_area(tags):
+    if "building" in tags:
+        group = "building"
+        kind = tags["building"]
+        if kind == "yes":
+            for key in ["amenity", "tourism"]:
+                if key in tags:
+                    kind = tags[key]
+                    break
+        if kind != "yes":
+            group += f":{kind}"
+        return group
+    if "area:highway" in tags:
+        return f'highway:{tags["area:highway"]}'
+    for key in [
+        "amenity",
+        "landcover",
+        "leisure",
+        "shop",
+        "highway",
+        "tourism",
+        "natural",
+        "waterway",
+        "landuse",
+    ]:
+        if key in tags:
+            return f"{key}:{tags[key]}"
+    return None
+
+
+def parse_way(tags):
+    keys = tags.keys()
+    for key in ["highway", "barrier", "natural"]:
+        if key in keys:
+            return f"{key}:{tags[key]}"
+    return None
+
+
+def match_to_group(label, patterns):
+    for group, pattern in patterns.items():
+        if re.match(pattern, label):
+            return group
+    return None
+
+
+class Patterns:
+    areas = dict(
+        building="building($|:.*?)*",
+        parking="amenity:parking",
+        playground="leisure:(playground|pitch)",
+        grass="(landuse:grass|landcover:grass|landuse:meadow|landuse:flowerbed|natural:grassland)",
+        park="leisure:(park|garden|dog_park)",
+        forest="(landuse:forest|natural:wood)",
+        water="(natural:water|waterway:*)",
+    )
+    # + ways: road, path
+    # + node: fountain, bicycle_parking
+
+    ways = dict(
+        fence="barrier:(fence|yes)",
+        wall="barrier:(wall|retaining_wall)",
+        hedge="barrier:hedge",
+        kerb="barrier:kerb",
+        building_outline="building($|:.*?)*",
+        cycleway="highway:cycleway",
+        path="highway:(pedestrian|footway|steps|path|corridor)",
+        road="highway:(motorway|trunk|primary|secondary|tertiary|service|construction|track|unclassified|residential|.*_link)",
+        busway="highway:busway",
+        tree_row="natural:tree_row",  # maybe merge with node?
+    )
+    # + nodes: bollard
+
+    nodes = dict(
+        tree="natural:tree",
+        stone="(natural:stone|barrier:block)",
+        crossing="highway:crossing",
+        lamp="highway:street_lamp",
+        traffic_signal="highway:traffic_signals",
+        bus_stop="highway:bus_stop",
+        stop_sign="highway:stop",
+        junction="highway:motorway_junction",
+        bus_stop_position="public_transport:stop_position",
+        gate="barrier:(gate|lift_gate|swing_gate|cycle_barrier)",
+        bollard="barrier:bollard",
+        shop="(shop.*?|amenity:(bank|post_office))",
+        restaurant="amenity:(restaurant|fast_food)",
+        bar="amenity:(cafe|bar|pub|biergarten)",
+        pharmacy="amenity:pharmacy",
+        fuel="amenity:fuel",
+        bicycle_parking="amenity:(bicycle_parking|bicycle_rental)",
+        charging_station="amenity:charging_station",
+        parking_entrance="amenity:parking_entrance",
+        atm="amenity:atm",
+        toilets="amenity:toilets",
+        vending_machine="amenity:vending_machine",
+        fountain="amenity:fountain",
+        waste_basket="amenity:(waste_basket|waste_disposal)",
+        bench="amenity:bench",
+        post_box="amenity:post_box",
+        artwork="tourism:artwork",
+        recycling="amenity:recycling",
+        give_way="highway:give_way",
+        clock="amenity:clock",
+        fire_hydrant="emergency:fire_hydrant",
+        pole="man_made:(flagpole|utility_pole)",
+        street_cabinet="man_made:street_cabinet",
+    )
+    # + ways: kerb
+
+
+class Groups:
+    areas = list(Patterns.areas)
+    ways = list(Patterns.ways)
+    nodes = list(Patterns.nodes)
+
+
+def group_elements(osm: OSMData):
+    elem2group = {
+        "area": {},
+        "way": {},
+        "node": {},
+    }
+
+    for node in filter(filter_node, osm.nodes.values()):
+        label = parse_node(node.tags)
+        if label is None:
+            continue
+        group = match_to_group(label, Patterns.nodes)
+        if group is None:
+            group = match_to_group(label, Patterns.ways)
+        if group is None:
+            continue  # missing
+        elem2group["node"][node.id_] = group
+
+    for way in filter(filter_way, osm.ways.values()):
+        label = parse_way(way.tags)
+        if label is None:
+            continue
+        group = match_to_group(label, Patterns.ways)
+        if group is None:
+            group = match_to_group(label, Patterns.nodes)
+        if group is None:
+            continue  # missing
+        elem2group["way"][way.id_] = group
+
+    for area in filter(filter_area, osm.ways.values()):
+        label = parse_area(area.tags)
+        if label is None:
+            continue
+        group = match_to_group(label, Patterns.areas)
+        if group is None:
+            group = match_to_group(label, Patterns.ways)
+        if group is None:
+            group = match_to_group(label, Patterns.nodes)
+        if group is None:
+            continue  # missing
+        elem2group["area"][area.id_] = group
+
+    return elem2group
diff --git a/osm/raster.py b/osm/raster.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e203bc50db0d873a9c8544ed0a1533c84a38df9
--- /dev/null
+++ b/osm/raster.py
@@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from typing import Dict, List
+
+import cv2
+import numpy as np
+import torch
+
+from utils.geo import BoundaryBox
+from .data import MapArea, MapLine, MapNode
+from .parser import Groups
+
+
+class Canvas:
+    def __init__(self, bbox: BoundaryBox, ppm: float):
+        self.bbox = bbox
+        self.ppm = ppm
+        self.scaling = bbox.size * ppm
+        self.w, self.h = np.ceil(self.scaling).astype(int)
+        self.clear()
+
+    def clear(self):
+        self.raster = np.zeros((self.h, self.w), np.uint8)
+
+    def to_uv(self, xy: np.ndarray):
+        xy = self.bbox.normalize(xy)
+        xy[..., 1] = 1 - xy[..., 1]
+        s = self.scaling
+        if isinstance(xy, torch.Tensor):
+            s = torch.from_numpy(s).to(xy)
+        return xy * s - 0.5
+
+    def to_xy(self, uv: np.ndarray):
+        s = self.scaling
+        if isinstance(uv, torch.Tensor):
+            s = torch.from_numpy(s).to(uv)
+        xy = (uv + 0.5) / s
+        xy[..., 1] = 1 - xy[..., 1]
+        return self.bbox.unnormalize(xy)
+
+    def draw_polygon(self, xy: np.ndarray):
+        uv = self.to_uv(xy)
+        cv2.fillPoly(self.raster, uv[None].astype(np.int32), 255)
+
+    def draw_multipolygon(self, xys: List[np.ndarray]):
+        uvs = [self.to_uv(xy).round().astype(np.int32) for xy in xys]
+        cv2.fillPoly(self.raster, uvs, 255)
+
+    def draw_line(self, xy: np.ndarray, width: float = 1):
+        uv = self.to_uv(xy)
+        cv2.polylines(
+            self.raster, uv[None].round().astype(np.int32), False, 255, thickness=width
+        )
+
+    def draw_cell(self, xy: np.ndarray):
+        if not self.bbox.contains(xy):
+            return
+        uv = self.to_uv(xy)
+        self.raster[tuple(uv.round().astype(int).T[::-1])] = 255
+
+
+def render_raster_masks(
+    nodes: List[MapNode],
+    lines: List[MapLine],
+    areas: List[MapArea],
+    canvas: Canvas,
+) -> Dict[str, np.ndarray]:
+    all_groups = Groups.areas + Groups.ways + Groups.nodes
+    masks = {k: np.zeros((canvas.h, canvas.w), np.uint8) for k in all_groups}
+
+    for area in areas:
+        canvas.raster = masks[area.group]
+        outlines = area.outers + area.inners
+        canvas.draw_multipolygon(outlines)
+        if area.group == "building":
+            canvas.raster = masks["building_outline"]
+            for line in outlines:
+                canvas.draw_line(line)
+
+    for line in lines:
+        canvas.raster = masks[line.group]
+        canvas.draw_line(line.xy)
+
+    for node in nodes:
+        canvas.raster = masks[node.group]
+        canvas.draw_cell(node.xy)
+
+    return masks
+
+
+def mask_to_idx(group2mask: Dict[str, np.ndarray], groups: List[str]) -> np.ndarray:
+    masks = np.stack([group2mask[k] for k in groups]) > 0
+    void = ~np.any(masks, 0)
+    idx = np.argmax(masks, 0)
+    idx = np.where(void, np.zeros_like(idx), idx + 1)  # add background
+    return idx
+
+
+def render_raster_map(masks: Dict[str, np.ndarray]) -> np.ndarray:
+    areas = mask_to_idx(masks, Groups.areas)
+    ways = mask_to_idx(masks, Groups.ways)
+    nodes = mask_to_idx(masks, Groups.nodes)
+    return np.stack([areas, ways, nodes])
diff --git a/osm/reader.py b/osm/reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..793ad1879f8b2068cd5265bed408be14719e9680
--- /dev/null
+++ b/osm/reader.py
@@ -0,0 +1,310 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from lxml import etree
+import numpy as np
+
+from utils.geo import BoundaryBox, Projection
+
+METERS_PATTERN: re.Pattern = re.compile("^(?P<value>\\d*\\.?\\d*)\\s*m$")
+KILOMETERS_PATTERN: re.Pattern = re.compile("^(?P<value>\\d*\\.?\\d*)\\s*km$")
+MILES_PATTERN: re.Pattern = re.compile("^(?P<value>\\d*\\.?\\d*)\\s*mi$")
+
+
+def parse_float(string: str) -> Optional[float]:
+    """Parse string representation of a float or integer value."""
+    try:
+        return float(string)
+    except (TypeError, ValueError):
+        return None
+
+
+@dataclass(eq=False)
+class OSMElement:
+    """
+    Something with tags (string to string mapping).
+    """
+
+    id_: int
+    tags: Dict[str, str]
+
+    def get_float(self, key: str) -> Optional[float]:
+        """Parse float from tag value."""
+        if key in self.tags:
+            return parse_float(self.tags[key])
+        return None
+
+    def get_length(self, key: str) -> Optional[float]:
+        """Get length in meters."""
+        if key not in self.tags:
+            return None
+
+        value: str = self.tags[key]
+
+        float_value: float = parse_float(value)
+        if float_value is not None:
+            return float_value
+
+        for pattern, ratio in [
+            (METERS_PATTERN, 1.0),
+            (KILOMETERS_PATTERN, 1000.0),
+            (MILES_PATTERN, 1609.344),
+        ]:
+            matcher: re.Match = pattern.match(value)
+            if matcher:
+                float_value: float = parse_float(matcher.group("value"))
+                if float_value is not None:
+                    return float_value * ratio
+
+        return None
+
+    def __hash__(self) -> int:
+        return self.id_
+
+
+@dataclass(eq=False)
+class OSMNode(OSMElement):
+    """
+    OpenStreetMap node.
+
+    See https://wiki.openstreetmap.org/wiki/Node
+    """
+
+    geo: np.ndarray
+    visible: Optional[str] = None
+    xy: Optional[np.ndarray] = None
+
+    @classmethod
+    def from_dict(cls, structure: Dict[str, Any]) -> "OSMNode":
+        """
+        Parse node from Overpass-like structure.
+
+        :param structure: input structure
+        """
+        return cls(
+            structure["id"],
+            structure.get("tags", {}),
+            geo=np.array((structure["lat"], structure["lon"])),
+            visible=structure.get("visible"),
+        )
+
+
+@dataclass(eq=False)
+class OSMWay(OSMElement):
+    """
+    OpenStreetMap way.
+
+    See https://wiki.openstreetmap.org/wiki/Way
+    """
+
+    nodes: Optional[List[OSMNode]] = field(default_factory=list)
+    visible: Optional[str] = None
+
+    @classmethod
+    def from_dict(
+        cls, structure: Dict[str, Any], nodes: Dict[int, OSMNode]
+    ) -> "OSMWay":
+        """
+        Parse way from Overpass-like structure.
+
+        :param structure: input structure
+        :param nodes: node structure
+        """
+        return cls(
+            structure["id"],
+            structure.get("tags", {}),
+            [nodes[x] for x in structure["nodes"]],
+            visible=structure.get("visible"),
+        )
+
+    def is_cycle(self) -> bool:
+        """Is way a cycle way or an area boundary."""
+        return self.nodes[0] == self.nodes[-1]
+
+    def __repr__(self) -> str:
+        return f"Way <{self.id_}> {self.nodes}"
+
+
+@dataclass
+class OSMMember:
+    """
+    Member of OpenStreetMap relation.
+    """
+
+    type_: str
+    ref: int
+    role: str
+
+
+@dataclass(eq=False)
+class OSMRelation(OSMElement):
+    """
+    OpenStreetMap relation.
+
+    See https://wiki.openstreetmap.org/wiki/Relation
+    """
+
+    members: Optional[List[OSMMember]]
+    visible: Optional[str] = None
+
+    @classmethod
+    def from_dict(cls, structure: Dict[str, Any]) -> "OSMRelation":
+        """
+        Parse relation from Overpass-like structure.
+
+        :param structure: input structure
+        """
+        return cls(
+            structure["id"],
+            structure["tags"],
+            [OSMMember(x["type"], x["ref"], x["role"]) for x in structure["members"]],
+            visible=structure.get("visible"),
+        )
+
+
+class OSMData:
+    """
+    The whole OpenStreetMap information about nodes, ways, and relations.
+    """
+
+    def __init__(self) -> None:
+        self.nodes: Dict[int, OSMNode] = {}
+        self.ways: Dict[int, OSMWay] = {}
+        self.relations: Dict[int, OSMRelation] = {}
+        self.box: BoundaryBox = None
+
+    @classmethod
+    def from_dict(cls, structure: Dict[str, Any]):
+        data = cls()
+        bounds = structure.get("bounds")
+        if bounds is not None:
+            data.box = BoundaryBox(
+                np.array([bounds["minlat"], bounds["minlon"]]),
+                np.array([bounds["maxlat"], bounds["maxlon"]]),
+            )
+
+        for element in structure["elements"]:
+            if element["type"] == "node":
+                node = OSMNode.from_dict(element)
+                data.add_node(node)
+        for element in structure["elements"]:
+            if element["type"] == "way":
+                way = OSMWay.from_dict(element, data.nodes)
+                data.add_way(way)
+        for element in structure["elements"]:
+            if element["type"] == "relation":
+                relation = OSMRelation.from_dict(element)
+                data.add_relation(relation)
+
+        return data
+
+    @classmethod
+    def from_json(cls, path: Path):
+        with path.open(encoding='utf-8') as fid:
+            structure = json.load(fid)
+        return cls.from_dict(structure)
+
+    @classmethod
+    def from_xml(cls, path: Path):
+        root = etree.parse(str(path)).getroot()
+        structure = {"elements": []}
+        from tqdm import tqdm
+
+        for elem in tqdm(root):
+            if elem.tag == "bounds":
+                structure["bounds"] = {
+                    k: float(elem.attrib[k])
+                    for k in ("minlon", "minlat", "maxlon", "maxlat")
+                }
+            elif elem.tag in {"node", "way", "relation"}:
+                if elem.tag == "node":
+                    item = {
+                        "id": int(elem.attrib["id"]),
+                        "lat": float(elem.attrib["lat"]),
+                        "lon": float(elem.attrib["lon"]),
+                        "visible": elem.attrib.get("visible"),
+                        "tags": {
+                            x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag"
+                        },
+                    }
+                elif elem.tag == "way":
+                    item = {
+                        "id": int(elem.attrib["id"]),
+                        "visible": elem.attrib.get("visible"),
+                        "tags": {
+                            x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag"
+                        },
+                        "nodes": [int(x.attrib["ref"]) for x in elem if x.tag == "nd"],
+                    }
+                elif elem.tag == "relation":
+                    item = {
+                        "id": int(elem.attrib["id"]),
+                        "visible": elem.attrib.get("visible"),
+                        "tags": {
+                            x.attrib["k"]: x.attrib["v"] for x in elem if x.tag == "tag"
+                        },
+                        "members": [
+                            {
+                                "type": x.attrib["type"],
+                                "ref": int(x.attrib["ref"]),
+                                "role": x.attrib["role"],
+                            }
+                            for x in elem
+                            if x.tag == "member"
+                        ],
+                    }
+                item["type"] = elem.tag
+                structure["elements"].append(item)
+            elem.clear()
+        del root
+        return cls.from_dict(structure)
+
+    @classmethod
+    def from_file(cls, path: Path):
+        ext = path.suffix
+        if ext == ".json":
+            return cls.from_json(path)
+        elif ext in {".osm", ".xml"}:
+            return cls.from_xml(path)
+        else:
+            raise ValueError(f"Unknown extension for {path}")
+
+    def add_node(self, node: OSMNode):
+        """Add node and update map parameters."""
+        if node.id_ in self.nodes:
+            raise ValueError(f"Node with duplicate id {node.id_}.")
+        self.nodes[node.id_] = node
+
+    def add_way(self, way: OSMWay):
+        """Add way and update map parameters."""
+        if way.id_ in self.ways:
+            raise ValueError(f"Way with duplicate id {way.id_}.")
+        self.ways[way.id_] = way
+
+    def add_relation(self, relation: OSMRelation):
+        """Add relation and update map parameters."""
+        if relation.id_ in self.relations:
+            raise ValueError(f"Relation with duplicate id {relation.id_}.")
+        self.relations[relation.id_] = relation
+
+    def add_xy_to_nodes(self, proj: Projection):
+        nodes = list(self.nodes.values())
+        if len(nodes) == 0:
+            return
+        geos = np.stack([n.geo for n in nodes], 0)
+        if proj.bounds is not None:
+            # For some reasons few nodes are sometimes very far off the initial bbox.
+            valid = proj.bounds.contains(geos)
+            if valid.mean() < 0.9:
+                print("Many nodes are out of the projection bounds.")
+            xys = np.zeros_like(geos)
+            xys[valid] = proj.project(geos[valid])
+        else:
+            xys = proj.project(geos)
+        for xy, node in zip(xys, nodes):
+            node.xy = xy
diff --git a/osm/tiling.py b/osm/tiling.py
new file mode 100644
index 0000000000000000000000000000000000000000..7badfdc480370752f8a8ad82922599a5357b0119
--- /dev/null
+++ b/osm/tiling.py
@@ -0,0 +1,310 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import io
+import pickle
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+from PIL import Image
+import rtree
+
+from utils.geo import BoundaryBox, Projection
+from .data import MapData
+from .download import get_osm
+from .parser import Groups
+from .raster import Canvas, render_raster_map, render_raster_masks
+from .reader import OSMData, OSMNode, OSMWay
+
+
+class MapIndex:
+    def __init__(
+        self,
+        data: MapData,
+    ):
+        self.index_nodes = rtree.index.Index()
+        for i, node in data.nodes.items():
+            self.index_nodes.insert(i, tuple(node.xy) * 2)
+
+        self.index_lines = rtree.index.Index()
+        for i, line in data.lines.items():
+            bbox = tuple(np.r_[line.xy.min(0), line.xy.max(0)])
+            self.index_lines.insert(i, bbox)
+
+        self.index_areas = rtree.index.Index()
+        for i, area in data.areas.items():
+            xy = np.concatenate(area.outers + area.inners)
+            bbox = tuple(np.r_[xy.min(0), xy.max(0)])
+            self.index_areas.insert(i, bbox)
+
+        self.data = data
+
+    def query(self, bbox: BoundaryBox) -> Tuple[List[OSMNode], List[OSMWay]]:
+        query = tuple(np.r_[bbox.min_, bbox.max_])
+        ret = []
+        for x in ["nodes", "lines", "areas"]:
+            ids = getattr(self, "index_" + x).intersection(query)
+            ret.append([getattr(self.data, x)[i] for i in ids])
+        return tuple(ret)
+
+
+def bbox_to_slice(bbox: BoundaryBox, canvas: Canvas):
+    uv_min = np.ceil(canvas.to_uv(bbox.min_)).astype(int)
+    uv_max = np.ceil(canvas.to_uv(bbox.max_)).astype(int)
+    slice_ = (slice(uv_max[1], uv_min[1]), slice(uv_min[0], uv_max[0]))
+    return slice_
+
+
+def round_bbox(bbox: BoundaryBox, origin: np.ndarray, ppm: int):
+    bbox = bbox.translate(-origin)
+    bbox = BoundaryBox(np.round(bbox.min_ * ppm) / ppm, np.round(bbox.max_ * ppm) / ppm)
+    return bbox.translate(origin)
+
+class MapTileManager:
+    def __init__(
+        self,
+        osmpath:Path,
+    ):
+
+        self.osm = OSMData.from_file(osmpath)
+
+
+    # @classmethod
+    def from_bbox(
+        self,
+        projection: Projection,
+        bbox: BoundaryBox,
+        ppm: int,
+        tile_size: int = 128,
+    ):
+        # bbox_osm = projection.unproject(bbox)
+        # if path is not None and path.is_file():
+        #     print(OSMData.from_file)
+        #     osm = OSMData.from_file(path)
+        #     if osm.box is not None:
+        #         assert osm.box.contains(bbox_osm)
+        # else:
+        #     osm = OSMData.from_dict(get_osm(bbox_osm, path))
+
+        self.osm.add_xy_to_nodes(projection)
+        map_data = MapData.from_osm(self.osm)
+        map_index = MapIndex(map_data)
+
+        bounds_x, bounds_y = [
+            np.r_[np.arange(min_, max_, tile_size), max_]
+            for min_, max_ in zip(bbox.min_, bbox.max_)
+        ]
+        bbox_tiles = {}
+        for i, xmin in enumerate(bounds_x[:-1]):
+            for j, ymin in enumerate(bounds_y[:-1]):
+                bbox_tiles[i, j] = BoundaryBox(
+                    [xmin, ymin], [bounds_x[i + 1], bounds_y[j + 1]]
+                )
+
+        tiles = {}
+        for ij, bbox_tile in bbox_tiles.items():
+            canvas = Canvas(bbox_tile, ppm)
+            nodes, lines, areas = map_index.query(bbox_tile)
+            masks = render_raster_masks(nodes, lines, areas, canvas)
+            canvas.raster = render_raster_map(masks)
+            tiles[ij] = canvas
+
+        groups = {k: v for k, v in vars(Groups).items() if not k.startswith("__")}
+
+        self.origin = bbox.min_
+        self.bbox = bbox
+        self.tiles = tiles
+        self.tile_size = tile_size
+        self.ppm = ppm
+        self.projection = projection
+        self.groups = groups
+        self.map_data = map_data
+
+        return  self.query(bbox)
+        # return cls(tiles, bbox, tile_size, ppm, projection, groups, map_data)
+
+    def query(self, bbox: BoundaryBox) -> Canvas:
+        bbox = round_bbox(bbox, self.bbox.min_, self.ppm)
+        canvas = Canvas(bbox, self.ppm)
+        raster = np.zeros((3, canvas.h, canvas.w), np.uint8)
+
+        bbox_all = bbox & self.bbox
+        ij_min = np.floor((bbox_all.min_ - self.origin) / self.tile_size).astype(int)
+        ij_max = np.ceil((bbox_all.max_ - self.origin) / self.tile_size).astype(int) - 1
+        for i in range(ij_min[0], ij_max[0] + 1):
+            for j in range(ij_min[1], ij_max[1] + 1):
+                tile = self.tiles[i, j]
+                bbox_select = tile.bbox & bbox
+                slice_query = bbox_to_slice(bbox_select, canvas)
+                slice_tile = bbox_to_slice(bbox_select, tile)
+                raster[(slice(None),) + slice_query] = tile.raster[
+                    (slice(None),) + slice_tile
+                ]
+        canvas.raster = raster
+        return canvas
+
+    def save(self, path: Path):
+        dump = {
+            "bbox": self.bbox.format(),
+            "tile_size": self.tile_size,
+            "ppm": self.ppm,
+            "groups": self.groups,
+            "tiles_bbox": {},
+            "tiles_raster": {},
+        }
+        if self.projection is not None:
+            dump["ref_latlonalt"] = self.projection.latlonalt
+        for ij, canvas in self.tiles.items():
+            dump["tiles_bbox"][ij] = canvas.bbox.format()
+            raster_bytes = io.BytesIO()
+            raster = Image.fromarray(canvas.raster.transpose(1, 2, 0).astype(np.uint8))
+            raster.save(raster_bytes, format="PNG")
+            dump["tiles_raster"][ij] = raster_bytes
+        with open(path, "wb") as fp:
+            pickle.dump(dump, fp)
+
+    @classmethod
+    def load(cls, path: Path):
+        with path.open("rb") as fp:
+            dump = pickle.load(fp)
+        tiles = {}
+        for ij, bbox in dump["tiles_bbox"].items():
+            tiles[ij] = Canvas(BoundaryBox.from_string(bbox), dump["ppm"])
+            raster = np.asarray(Image.open(dump["tiles_raster"][ij]))
+            tiles[ij].raster = raster.transpose(2, 0, 1).copy()
+        projection = Projection(*dump["ref_latlonalt"])
+        return cls(
+            tiles,
+            BoundaryBox.from_string(dump["bbox"]),
+            dump["tile_size"],
+            dump["ppm"],
+            projection,
+            dump["groups"],
+        )
+
+class TileManager:
+    def __init__(
+        self,
+        tiles: Dict,
+        bbox: BoundaryBox,
+        tile_size: int,
+        ppm: int,
+        projection: Projection,
+        groups: Dict[str, List[str]],
+        map_data: Optional[MapData] = None,
+    ):
+        self.origin = bbox.min_
+        self.bbox = bbox
+        self.tiles = tiles
+        self.tile_size = tile_size
+        self.ppm = ppm
+        self.projection = projection
+        self.groups = groups
+        self.map_data = map_data
+        assert np.all(tiles[0, 0].bbox.min_ == self.origin)
+        for tile in tiles.values():
+            assert bbox.contains(tile.bbox)
+
+    @classmethod
+    def from_bbox(
+        cls,
+        projection: Projection,
+        bbox: BoundaryBox,
+        ppm: float,
+        path: Optional[Path] = None,
+        tile_size: int = 128,
+    ):
+        bbox_osm = projection.unproject(bbox)
+        if path is not None and path.is_file():
+            print(OSMData.from_file)
+            osm = OSMData.from_file(path)
+            if osm.box is not None:
+                assert osm.box.contains(bbox_osm)
+        else:
+            osm = OSMData.from_dict(get_osm(bbox_osm, path))
+
+        osm.add_xy_to_nodes(projection)
+        map_data = MapData.from_osm(osm)
+        map_index = MapIndex(map_data)
+
+        bounds_x, bounds_y = [
+            np.r_[np.arange(min_, max_, tile_size), max_]
+            for min_, max_ in zip(bbox.min_, bbox.max_)
+        ]
+        bbox_tiles = {}
+        for i, xmin in enumerate(bounds_x[:-1]):
+            for j, ymin in enumerate(bounds_y[:-1]):
+                bbox_tiles[i, j] = BoundaryBox(
+                    [xmin, ymin], [bounds_x[i + 1], bounds_y[j + 1]]
+                )
+
+        tiles = {}
+        for ij, bbox_tile in bbox_tiles.items():
+            canvas = Canvas(bbox_tile, ppm)
+            nodes, lines, areas = map_index.query(bbox_tile)
+            masks = render_raster_masks(nodes, lines, areas, canvas)
+            canvas.raster = render_raster_map(masks)
+            tiles[ij] = canvas
+
+        groups = {k: v for k, v in vars(Groups).items() if not k.startswith("__")}
+
+        return cls(tiles, bbox, tile_size, ppm, projection, groups, map_data)
+
+    def query(self, bbox: BoundaryBox) -> Canvas:
+        bbox = round_bbox(bbox, self.bbox.min_, self.ppm)
+        canvas = Canvas(bbox, self.ppm)
+        raster = np.zeros((3, canvas.h, canvas.w), np.uint8)
+
+        bbox_all = bbox & self.bbox
+        ij_min = np.floor((bbox_all.min_ - self.origin) / self.tile_size).astype(int)
+        ij_max = np.ceil((bbox_all.max_ - self.origin) / self.tile_size).astype(int) - 1
+        for i in range(ij_min[0], ij_max[0] + 1):
+            for j in range(ij_min[1], ij_max[1] + 1):
+                tile = self.tiles[i, j]
+                bbox_select = tile.bbox & bbox
+                slice_query = bbox_to_slice(bbox_select, canvas)
+                slice_tile = bbox_to_slice(bbox_select, tile)
+                raster[(slice(None),) + slice_query] = tile.raster[
+                    (slice(None),) + slice_tile
+                ]
+        canvas.raster = raster
+        return canvas
+
+    def save(self, path: Path):
+        dump = {
+            "bbox": self.bbox.format(),
+            "tile_size": self.tile_size,
+            "ppm": self.ppm,
+            "groups": self.groups,
+            "tiles_bbox": {},
+            "tiles_raster": {},
+        }
+        if self.projection is not None:
+            dump["ref_latlonalt"] = self.projection.latlonalt
+        for ij, canvas in self.tiles.items():
+            dump["tiles_bbox"][ij] = canvas.bbox.format()
+            raster_bytes = io.BytesIO()
+            raster = Image.fromarray(canvas.raster.transpose(1, 2, 0).astype(np.uint8))
+            raster.save(raster_bytes, format="PNG")
+            dump["tiles_raster"][ij] = raster_bytes
+        with open(path, "wb") as fp:
+            pickle.dump(dump, fp)
+
+    @classmethod
+    def load(cls, path: Path):
+        with path.open("rb") as fp:
+            dump = pickle.load(fp)
+        tiles = {}
+        for ij, bbox in dump["tiles_bbox"].items():
+            tiles[ij] = Canvas(BoundaryBox.from_string(bbox), dump["ppm"])
+            raster = np.asarray(Image.open(dump["tiles_raster"][ij]))
+            tiles[ij].raster = raster.transpose(2, 0, 1).copy()
+        projection = Projection(*dump["ref_latlonalt"])
+        return cls(
+            tiles,
+            BoundaryBox.from_string(dump["bbox"]),
+            dump["tile_size"],
+            dump["ppm"],
+            projection,
+            dump["groups"],
+        )
diff --git a/osm/viz.py b/osm/viz.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd99e3eda0049c2aae35d397018db73b2eb661ae
--- /dev/null
+++ b/osm/viz.py
@@ -0,0 +1,159 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import plotly.graph_objects as go
+import PIL.Image
+
+from utils.viz_2d import add_text
+from .parser import Groups
+
+
+class GeoPlotter:
+    def __init__(self, zoom=12, **kwargs):
+        self.fig = go.Figure()
+        self.fig.update_layout(
+            mapbox_style="open-street-map",
+            autosize=True,
+            mapbox_zoom=zoom,
+            margin={"r": 0, "t": 0, "l": 0, "b": 0},
+            showlegend=True,
+            **kwargs,
+        )
+
+    def points(self, latlons, color, text=None, name=None, size=5, **kwargs):
+        latlons = np.asarray(latlons)
+        self.fig.add_trace(
+            go.Scattermapbox(
+                lat=latlons[..., 0],
+                lon=latlons[..., 1],
+                mode="markers",
+                text=text,
+                marker_color=color,
+                marker_size=size,
+                name=name,
+                **kwargs,
+            )
+        )
+        center = latlons.reshape(-1, 2).mean(0)
+        self.fig.update_layout(
+            mapbox_center=dict(zip(("lat", "lon"), center)),
+        )
+
+    def bbox(self, bbox, color, name=None, **kwargs):
+        corners = np.stack(
+            [bbox.min_, bbox.left_top, bbox.max_, bbox.right_bottom, bbox.min_]
+        )
+        self.fig.add_trace(
+            go.Scattermapbox(
+                lat=corners[:, 0],
+                lon=corners[:, 1],
+                mode="lines",
+                marker_color=color,
+                name=name,
+                **kwargs,
+            )
+        )
+        self.fig.update_layout(
+            mapbox_center=dict(zip(("lat", "lon"), bbox.center)),
+        )
+
+    def raster(self, raster, bbox, below="traces", **kwargs):
+        if not np.issubdtype(raster.dtype, np.integer):
+            raster = (raster * 255).astype(np.uint8)
+        raster = PIL.Image.fromarray(raster)
+        corners = np.stack(
+            [
+                bbox.min_,
+                bbox.left_top,
+                bbox.max_,
+                bbox.right_bottom,
+            ]
+        )[::-1, ::-1]
+        layers = [*self.fig.layout.mapbox.layers]
+        layers.append(
+            dict(
+                sourcetype="image",
+                source=raster,
+                coordinates=corners,
+                below=below,
+                **kwargs,
+            )
+        )
+        self.fig.layout.mapbox.layers = layers
+
+
+map_colors = {
+    "building": (84, 155, 255),
+    "parking": (255, 229, 145),
+    "playground": (150, 133, 125),
+    "grass": (188, 255, 143),
+    "park": (0, 158, 16),
+    "forest": (0, 92, 9),
+    "water": (184, 213, 255),
+    "fence": (238, 0, 255),
+    "wall": (0, 0, 0),
+    "hedge": (107, 68, 48),
+    "kerb": (255, 234, 0),
+    "building_outline": (0, 0, 255),
+    "cycleway": (0, 251, 255),
+    "path": (8, 237, 0),
+    "road": (255, 0, 0),
+    "tree_row": (0, 92, 9),
+    "busway": (255, 128, 0),
+    "void": [int(255 * 0.9)] * 3,
+}
+
+
+class Colormap:
+    colors_areas = np.stack([map_colors[k] for k in ["void"] + Groups.areas])
+    colors_ways = np.stack([map_colors[k] for k in ["void"] + Groups.ways])
+
+    @classmethod
+    def apply(cls, rasters):
+        return (
+            np.where(
+                rasters[1, ..., None] > 0,
+                cls.colors_ways[rasters[1]],
+                cls.colors_areas[rasters[0]],
+            )
+            / 255.0
+        )
+
+    @classmethod
+    def add_colorbar(cls):
+        ax2 = plt.gcf().add_axes([1, 0.1, 0.02, 0.8])
+        color_list = np.r_[cls.colors_areas[1:], cls.colors_ways[1:]] / 255.0
+        cmap = mpl.colors.ListedColormap(color_list[::-1])
+        ticks = np.linspace(0, 1, len(color_list), endpoint=False)
+        ticks += 1 / len(color_list) / 2
+        cb = mpl.colorbar.ColorbarBase(
+            ax2,
+            cmap=cmap,
+            orientation="vertical",
+            ticks=ticks,
+        )
+        cb.set_ticklabels((Groups.areas + Groups.ways)[::-1])
+        ax2.tick_params(labelsize=15)
+
+
+def plot_nodes(idx, raster, fontsize=8, size=15):
+    ax = plt.gcf().axes[idx]
+    ax.autoscale(enable=False)
+    nodes_xy = np.stack(np.where(raster > 0)[::-1], -1)
+    nodes_val = raster[tuple(nodes_xy.T[::-1])] - 1
+    ax.scatter(*nodes_xy.T, c="k", s=size)
+    for xy, val in zip(nodes_xy, nodes_val):
+        group = Groups.nodes[val]
+        add_text(
+            idx,
+            group,
+            xy + 2,
+            lcolor=None,
+            fs=fontsize,
+            color="k",
+            normalized=False,
+            ha="center",
+        )
+    plt.show()
diff --git a/utils/exif.py b/utils/exif.py
new file mode 100644
index 0000000000000000000000000000000000000000..c272019b5673dc1e3aab8a3e0e21b630cc629154
--- /dev/null
+++ b/utils/exif.py
@@ -0,0 +1,356 @@
+"""Copied from opensfm.exif to minimize hard dependencies."""
+from pathlib import Path
+import json
+import datetime
+import logging
+from codecs import encode, decode
+from typing import Any, Dict, Optional, Tuple
+
+import exifread
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+inch_in_mm = 25.4
+cm_in_mm = 10
+um_in_mm = 0.001
+default_projection = "perspective"
+maximum_altitude = 1e4
+
+
+def sensor_data():
+    with (Path(__file__).parent / "sensor_data.json").open() as fid:
+        data = json.load(fid)
+    return {k.lower(): v for k, v in data.items()}
+
+
+def eval_frac(value) -> Optional[float]:
+    try:
+        return float(value.num) / float(value.den)
+    except ZeroDivisionError:
+        return None
+
+
+def gps_to_decimal(values, reference) -> Optional[float]:
+    sign = 1 if reference in "NE" else -1
+    degrees = eval_frac(values[0])
+    minutes = eval_frac(values[1])
+    seconds = eval_frac(values[2])
+    if degrees is not None and minutes is not None and seconds is not None:
+        return sign * (degrees + minutes / 60 + seconds / 3600)
+    return None
+
+
+def get_tag_as_float(tags, key, index: int = 0) -> Optional[float]:
+    if key in tags:
+        val = tags[key].values[index]
+        if isinstance(val, exifread.utils.Ratio):
+            ret_val = eval_frac(val)
+            if ret_val is None:
+                logger.error(
+                    'The rational "{2}" of tag "{0:s}" at index {1:d} c'
+                    "aused a division by zero error".format(key, index, val)
+                )
+            return ret_val
+        else:
+            return float(val)
+    else:
+        return None
+
+
+def compute_focal(
+    focal_35: Optional[float], focal: Optional[float], sensor_width, sensor_string
+) -> Tuple[float, float]:
+    if focal_35 is not None and focal_35 > 0:
+        focal_ratio = focal_35 / 36.0  # 35mm film produces 36x24mm pictures.
+    else:
+        if not sensor_width:
+            sensor_width = sensor_data().get(sensor_string, None)
+        if sensor_width and focal:
+            focal_ratio = focal / sensor_width
+            focal_35 = 36.0 * focal_ratio
+        else:
+            focal_35 = 0.0
+            focal_ratio = 0.0
+    return focal_35, focal_ratio
+
+
+def sensor_string(make: str, model: str) -> str:
+    if make != "unknown":
+        # remove duplicate 'make' information in 'model'
+        model = model.replace(make, "")
+    return (make.strip() + " " + model.strip()).strip().lower()
+
+
+def unescape_string(s) -> str:
+    return decode(encode(s, "latin-1", "backslashreplace"), "unicode-escape")
+
+
+class EXIF:
+    def __init__(
+        self, fileobj, image_size_loader, use_exif_size=True, name=None
+    ) -> None:
+        self.image_size_loader = image_size_loader
+        self.use_exif_size = use_exif_size
+        self.fileobj = fileobj
+        self.tags = exifread.process_file(fileobj, details=False)
+        fileobj.seek(0)
+        self.fileobj_name = self.fileobj.name if name is None else name
+
+    def extract_image_size(self) -> Tuple[int, int]:
+        if (
+            self.use_exif_size
+            and "EXIF ExifImageWidth" in self.tags
+            and "EXIF ExifImageLength" in self.tags
+        ):
+            width, height = (
+                int(self.tags["EXIF ExifImageWidth"].values[0]),
+                int(self.tags["EXIF ExifImageLength"].values[0]),
+            )
+        elif (
+            self.use_exif_size
+            and "Image ImageWidth" in self.tags
+            and "Image ImageLength" in self.tags
+        ):
+            width, height = (
+                int(self.tags["Image ImageWidth"].values[0]),
+                int(self.tags["Image ImageLength"].values[0]),
+            )
+        else:
+            height, width = self.image_size_loader()
+        return width, height
+
+    def _decode_make_model(self, value) -> str:
+        """Python 2/3 compatible decoding of make/model field."""
+        if hasattr(value, "decode"):
+            try:
+                return value.decode("utf-8")
+            except UnicodeDecodeError:
+                return "unknown"
+        else:
+            return value
+
+    def extract_make(self) -> str:
+        # Camera make and model
+        if "EXIF LensMake" in self.tags:
+            make = self.tags["EXIF LensMake"].values
+        elif "Image Make" in self.tags:
+            make = self.tags["Image Make"].values
+        else:
+            make = "unknown"
+        return self._decode_make_model(make)
+
+    def extract_model(self) -> str:
+        if "EXIF LensModel" in self.tags:
+            model = self.tags["EXIF LensModel"].values
+        elif "Image Model" in self.tags:
+            model = self.tags["Image Model"].values
+        else:
+            model = "unknown"
+        return self._decode_make_model(model)
+
+    def extract_focal(self) -> Tuple[float, float]:
+        make, model = self.extract_make(), self.extract_model()
+        focal_35, focal_ratio = compute_focal(
+            get_tag_as_float(self.tags, "EXIF FocalLengthIn35mmFilm"),
+            get_tag_as_float(self.tags, "EXIF FocalLength"),
+            self.extract_sensor_width(),
+            sensor_string(make, model),
+        )
+        return focal_35, focal_ratio
+
+    def extract_sensor_width(self) -> Optional[float]:
+        """Compute sensor with from width and resolution."""
+        if (
+            "EXIF FocalPlaneResolutionUnit" not in self.tags
+            or "EXIF FocalPlaneXResolution" not in self.tags
+        ):
+            return None
+        resolution_unit = self.tags["EXIF FocalPlaneResolutionUnit"].values[0]
+        mm_per_unit = self.get_mm_per_unit(resolution_unit)
+        if not mm_per_unit:
+            return None
+        pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneXResolution")
+        if pixels_per_unit is None:
+            return None
+        if pixels_per_unit <= 0.0:
+            pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneYResolution")
+            if pixels_per_unit is None or pixels_per_unit <= 0.0:
+                return None
+        units_per_pixel = 1 / pixels_per_unit
+        width_in_pixels = self.extract_image_size()[0]
+        return width_in_pixels * units_per_pixel * mm_per_unit
+
+    def get_mm_per_unit(self, resolution_unit) -> Optional[float]:
+        """Length of a resolution unit in millimeters.
+
+        Uses the values from the EXIF specs in
+        https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/EXIF.html
+
+        Args:
+            resolution_unit: the resolution unit value given in the EXIF
+        """
+        if resolution_unit == 2:  # inch
+            return inch_in_mm
+        elif resolution_unit == 3:  # cm
+            return cm_in_mm
+        elif resolution_unit == 4:  # mm
+            return 1
+        elif resolution_unit == 5:  # um
+            return um_in_mm
+        else:
+            logger.warning(
+                "Unknown EXIF resolution unit value: {}".format(resolution_unit)
+            )
+            return None
+
+    def extract_orientation(self) -> int:
+        orientation = 1
+        if "Image Orientation" in self.tags:
+            value = self.tags.get("Image Orientation").values[0]
+            if type(value) == int and value != 0:
+                orientation = value
+        return orientation
+
+    def extract_ref_lon_lat(self) -> Tuple[str, str]:
+        if "GPS GPSLatitudeRef" in self.tags:
+            reflat = self.tags["GPS GPSLatitudeRef"].values
+        else:
+            reflat = "N"
+        if "GPS GPSLongitudeRef" in self.tags:
+            reflon = self.tags["GPS GPSLongitudeRef"].values
+        else:
+            reflon = "E"
+        return reflon, reflat
+
+    def extract_lon_lat(self) -> Tuple[Optional[float], Optional[float]]:
+        if "GPS GPSLatitude" in self.tags:
+            reflon, reflat = self.extract_ref_lon_lat()
+            lat = gps_to_decimal(self.tags["GPS GPSLatitude"].values, reflat)
+            lon = gps_to_decimal(self.tags["GPS GPSLongitude"].values, reflon)
+        else:
+            lon, lat = None, None
+        return lon, lat
+
+    def extract_altitude(self) -> Optional[float]:
+        if "GPS GPSAltitude" in self.tags:
+            alt_value = self.tags["GPS GPSAltitude"].values[0]
+            if isinstance(alt_value, exifread.utils.Ratio):
+                altitude = eval_frac(alt_value)
+            elif isinstance(alt_value, int):
+                altitude = float(alt_value)
+            else:
+                altitude = None
+
+            # Check if GPSAltitudeRef is equal to 1, which means GPSAltitude should be negative, reference: http://www.exif.org/Exif2-2.PDF#page=53
+            if (
+                "GPS GPSAltitudeRef" in self.tags
+                and self.tags["GPS GPSAltitudeRef"].values[0] == 1
+                and altitude is not None
+            ):
+                altitude = -altitude
+        else:
+            altitude = None
+        return altitude
+
+    def extract_dop(self) -> Optional[float]:
+        if "GPS GPSDOP" in self.tags:
+            return eval_frac(self.tags["GPS GPSDOP"].values[0])
+        return None
+
+    def extract_geo(self) -> Dict[str, Any]:
+        altitude = self.extract_altitude()
+        dop = self.extract_dop()
+        lon, lat = self.extract_lon_lat()
+        d = {}
+
+        if lon is not None and lat is not None:
+            d["latitude"] = lat
+            d["longitude"] = lon
+        if altitude is not None:
+            d["altitude"] = min([maximum_altitude, altitude])
+        if dop is not None:
+            d["dop"] = dop
+        return d
+
+    def extract_capture_time(self) -> float:
+        if (
+            "GPS GPSDate" in self.tags
+            and "GPS GPSTimeStamp" in self.tags  # Actually GPSDateStamp
+        ):
+            try:
+                hours_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 0)
+                minutes_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 1)
+                if hours_f is None or minutes_f is None:
+                    raise TypeError
+                hours = int(hours_f)
+                minutes = int(minutes_f)
+                seconds = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 2)
+                gps_timestamp_string = "{0:s} {1:02d}:{2:02d}:{3:02f}".format(
+                    self.tags["GPS GPSDate"].values, hours, minutes, seconds
+                )
+                return (
+                    datetime.datetime.strptime(
+                        gps_timestamp_string, "%Y:%m:%d %H:%M:%S.%f"
+                    )
+                    - datetime.datetime(1970, 1, 1)
+                ).total_seconds()
+            except (TypeError, ValueError):
+                logger.info(
+                    'The GPS time stamp in image file "{0:s}" is invalid. '
+                    "Falling back to DateTime*".format(self.fileobj_name)
+                )
+
+        time_strings = [
+            ("EXIF DateTimeOriginal", "EXIF SubSecTimeOriginal", "EXIF Tag 0x9011"),
+            ("EXIF DateTimeDigitized", "EXIF SubSecTimeDigitized", "EXIF Tag 0x9012"),
+            ("Image DateTime", "Image SubSecTime", "Image Tag 0x9010"),
+        ]
+        for datetime_tag, subsec_tag, offset_tag in time_strings:
+            if datetime_tag in self.tags:
+                date_time = self.tags[datetime_tag].values
+                if subsec_tag in self.tags:
+                    subsec_time = self.tags[subsec_tag].values
+                else:
+                    subsec_time = "0"
+                try:
+                    s = "{0:s}.{1:s}".format(date_time, subsec_time)
+                    d = datetime.datetime.strptime(s, "%Y:%m:%d %H:%M:%S.%f")
+                except ValueError:
+                    logger.debug(
+                        'The "{1:s}" time stamp or "{2:s}" tag is invalid in '
+                        'image file "{0:s}"'.format(
+                            self.fileobj_name, datetime_tag, subsec_tag
+                        )
+                    )
+                    continue
+                # Test for OffsetTimeOriginal | OffsetTimeDigitized | OffsetTime
+                if offset_tag in self.tags:
+                    offset_time = self.tags[offset_tag].values
+                    try:
+                        d += datetime.timedelta(
+                            hours=-int(offset_time[0:3]), minutes=int(offset_time[4:6])
+                        )
+                    except (TypeError, ValueError):
+                        logger.debug(
+                            'The "{0:s}" time zone offset in image file "{1:s}"'
+                            " is invalid".format(offset_tag, self.fileobj_name)
+                        )
+                        logger.debug(
+                            'Naively assuming UTC on "{0:s}" in image file '
+                            '"{1:s}"'.format(datetime_tag, self.fileobj_name)
+                        )
+                else:
+                    logger.debug(
+                        "No GPS time stamp and no time zone offset in image "
+                        'file "{0:s}"'.format(self.fileobj_name)
+                    )
+                    logger.debug(
+                        'Naively assuming UTC on "{0:s}" in image file "{1:s}"'.format(
+                            datetime_tag, self.fileobj_name
+                        )
+                    )
+                return (d - datetime.datetime(1970, 1, 1)).total_seconds()
+        logger.info(
+            'Image file "{0:s}" has no valid time stamp'.format(self.fileobj_name)
+        )
+        return 0.0
diff --git a/utils/geo.py b/utils/geo.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f97e501f81092b57fc3ad043aa520779b44faeb
--- /dev/null
+++ b/utils/geo.py
@@ -0,0 +1,130 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+from typing import Union
+
+import numpy as np
+import torch
+
+from .geo_opensfm import TopocentricConverter
+
+
+class BoundaryBox:
+    def __init__(self, min_: np.ndarray, max_: np.ndarray):
+        self.min_ = np.asarray(min_)
+        self.max_ = np.asarray(max_)
+        assert np.all(self.min_ <= self.max_)
+
+    @classmethod
+    def from_string(cls, string: str):
+        return cls(*np.split(np.array(string.split(","), float), 2))
+
+    @property
+    def left_top(self):
+        return np.stack([self.min_[..., 0], self.max_[..., 1]], -1)
+
+    @property
+    def right_bottom(self) -> (np.ndarray, np.ndarray):
+        return np.stack([self.max_[..., 0], self.min_[..., 1]], -1)
+
+    @property
+    def center(self) -> np.ndarray:
+        return (self.min_ + self.max_) / 2
+
+    @property
+    def size(self) -> np.ndarray:
+        return self.max_ - self.min_
+
+    def translate(self, t: float):
+        return self.__class__(self.min_ + t, self.max_ + t)
+
+    def contains(self, xy: Union[np.ndarray, "BoundaryBox"]):
+        if isinstance(xy, self.__class__):
+            return self.contains(xy.min_) and self.contains(xy.max_)
+        return np.all((xy >= self.min_) & (xy <= self.max_), -1)
+
+    def normalize(self, xy):
+        min_, max_ = self.min_, self.max_
+        if isinstance(xy, torch.Tensor):
+            min_ = torch.from_numpy(min_).to(xy)
+            max_ = torch.from_numpy(max_).to(xy)
+        return (xy - min_) / (max_ - min_)
+
+    def unnormalize(self, xy):
+        min_, max_ = self.min_, self.max_
+        if isinstance(xy, torch.Tensor):
+            min_ = torch.from_numpy(min_).to(xy)
+            max_ = torch.from_numpy(max_).to(xy)
+        return xy * (max_ - min_) + min_
+
+    def format(self) -> str:
+        return ",".join(np.r_[self.min_, self.max_].astype(str))
+
+    def __add__(self, x):
+        if isinstance(x, (int, float)):
+            return self.__class__(self.min_ - x, self.max_ + x)
+        else:
+            raise TypeError(f"Cannot add {self.__class__.__name__} to {type(x)}.")
+
+    def __and__(self, other):
+        return self.__class__(
+            np.maximum(self.min_, other.min_), np.minimum(self.max_, other.max_)
+        )
+
+    def __repr__(self):
+        return self.format()
+
+
+class Projection:
+    def __init__(self, lat, lon, alt=0, max_extent=25e3):
+        # The approximation error is |L - radius * tan(L / radius)|
+        # and is around 13cm for L=25km.
+        self.latlonalt = (lat, lon, alt)
+        self.converter = TopocentricConverter(lat, lon, alt)
+        min_ = self.converter.to_lla(*(-max_extent,) * 2, 0)[:2]
+        max_ = self.converter.to_lla(*(max_extent,) * 2, 0)[:2]
+        self.bounds = BoundaryBox(min_, max_)
+
+    @classmethod
+    def from_points(cls, all_latlon):
+        assert all_latlon.shape[-1] == 2
+        all_latlon = all_latlon.reshape(-1, 2)
+        latlon_mid = (all_latlon.min(0) + all_latlon.max(0)) / 2
+        return cls(*latlon_mid)
+
+    def check_bbox(self, bbox: BoundaryBox):
+        if self.bounds is not None and not self.bounds.contains(bbox):
+            raise ValueError(
+                f"Bbox {bbox.format()} is not contained in "
+                f"projection with bounds {self.bounds.format()}."
+            )
+
+    def project(self, geo, return_z=False):
+        if isinstance(geo, BoundaryBox):
+            return BoundaryBox(*self.project(np.stack([geo.min_, geo.max_])))
+        geo = np.asarray(geo)
+        assert geo.shape[-1] in (2, 3)
+        if self.bounds is not None:
+            if not np.all(self.bounds.contains(geo[..., :2])):
+                raise ValueError(
+                    f"Points {geo} are out of the valid bounds "
+                    f"{self.bounds.format()}."
+                )
+        lat, lon = geo[..., 0], geo[..., 1]
+        if geo.shape[-1] == 3:
+            alt = geo[..., -1]
+        else:
+            alt = np.zeros_like(lat)
+        x, y, z = self.converter.to_topocentric(lat, lon, alt)
+        return np.stack([x, y] + ([z] if return_z else []), -1)
+
+    def unproject(self, xy, return_z=False):
+        if isinstance(xy, BoundaryBox):
+            return BoundaryBox(*self.unproject(np.stack([xy.min_, xy.max_])))
+        xy = np.asarray(xy)
+        x, y = xy[..., 0], xy[..., 1]
+        if xy.shape[-1] == 3:
+            z = xy[..., -1]
+        else:
+            z = np.zeros_like(x)
+        lat, lon, alt = self.converter.to_lla(x, y, z)
+        return np.stack([lat, lon] + ([alt] if return_z else []), -1)
diff --git a/utils/geo_opensfm.py b/utils/geo_opensfm.py
new file mode 100644
index 0000000000000000000000000000000000000000..d42145236dd4c65a8764cbe37fa25c527814017e
--- /dev/null
+++ b/utils/geo_opensfm.py
@@ -0,0 +1,180 @@
+"""Copied from opensfm.geo to minimize hard dependencies."""
+import numpy as np
+from numpy import ndarray
+from typing import Tuple
+
+WGS84_a = 6378137.0
+WGS84_b = 6356752.314245
+
+
+def ecef_from_lla(lat, lon, alt: float) -> Tuple[float, ...]:
+    """
+    Compute ECEF XYZ from latitude, longitude and altitude.
+
+    All using the WGS84 model.
+    Altitude is the distance to the WGS84 ellipsoid.
+    Check results here http://www.oc.nps.edu/oc2902w/coord/llhxyz.htm
+
+    >>> lat, lon, alt = 10, 20, 30
+    >>> x, y, z = ecef_from_lla(lat, lon, alt)
+    >>> np.allclose(lla_from_ecef(x,y,z), [lat, lon, alt])
+    True
+    """
+    a2 = WGS84_a**2
+    b2 = WGS84_b**2
+    lat = np.radians(lat)
+    lon = np.radians(lon)
+    L = 1.0 / np.sqrt(a2 * np.cos(lat) ** 2 + b2 * np.sin(lat) ** 2)
+    x = (a2 * L + alt) * np.cos(lat) * np.cos(lon)
+    y = (a2 * L + alt) * np.cos(lat) * np.sin(lon)
+    z = (b2 * L + alt) * np.sin(lat)
+    return x, y, z
+
+
+def lla_from_ecef(x, y, z):
+    """
+    Compute latitude, longitude and altitude from ECEF XYZ.
+
+    All using the WGS84 model.
+    Altitude is the distance to the WGS84 ellipsoid.
+    """
+    a = WGS84_a
+    b = WGS84_b
+    ea = np.sqrt((a**2 - b**2) / a**2)
+    eb = np.sqrt((a**2 - b**2) / b**2)
+    p = np.sqrt(x**2 + y**2)
+    theta = np.arctan2(z * a, p * b)
+    lon = np.arctan2(y, x)
+    lat = np.arctan2(
+        z + eb**2 * b * np.sin(theta) ** 3, p - ea**2 * a * np.cos(theta) ** 3
+    )
+    N = a / np.sqrt(1 - ea**2 * np.sin(lat) ** 2)
+    alt = p / np.cos(lat) - N
+    return np.degrees(lat), np.degrees(lon), alt
+
+
+def ecef_from_topocentric_transform(lat, lon, alt: float) -> ndarray:
+    """
+    Transformation from a topocentric frame at reference position to ECEF.
+
+    The topocentric reference frame is a metric one with the origin
+    at the given (lat, lon, alt) position, with the X axis heading east,
+    the Y axis heading north and the Z axis vertical to the ellipsoid.
+    >>> a = ecef_from_topocentric_transform(30, 20, 10)
+    >>> b = ecef_from_topocentric_transform_finite_diff(30, 20, 10)
+    >>> np.allclose(a, b)
+    True
+    """
+    x, y, z = ecef_from_lla(lat, lon, alt)
+    sa = np.sin(np.radians(lat))
+    ca = np.cos(np.radians(lat))
+    so = np.sin(np.radians(lon))
+    co = np.cos(np.radians(lon))
+    return np.array(
+        [
+            [-so, -sa * co, ca * co, x],
+            [co, -sa * so, ca * so, y],
+            [0, ca, sa, z],
+            [0, 0, 0, 1],
+        ]
+    )
+
+
+def ecef_from_topocentric_transform_finite_diff(lat, lon, alt: float) -> ndarray:
+    """
+    Transformation from a topocentric frame at reference position to ECEF.
+
+    The topocentric reference frame is a metric one with the origin
+    at the given (lat, lon, alt) position, with the X axis heading east,
+    the Y axis heading north and the Z axis vertical to the ellipsoid.
+    """
+    eps = 1e-2
+    x, y, z = ecef_from_lla(lat, lon, alt)
+    v1 = (
+        (
+            np.array(ecef_from_lla(lat, lon + eps, alt))
+            - np.array(ecef_from_lla(lat, lon - eps, alt))
+        )
+        / 2
+        / eps
+    )
+    v2 = (
+        (
+            np.array(ecef_from_lla(lat + eps, lon, alt))
+            - np.array(ecef_from_lla(lat - eps, lon, alt))
+        )
+        / 2
+        / eps
+    )
+    v3 = (
+        (
+            np.array(ecef_from_lla(lat, lon, alt + eps))
+            - np.array(ecef_from_lla(lat, lon, alt - eps))
+        )
+        / 2
+        / eps
+    )
+    v1 /= np.linalg.norm(v1)
+    v2 /= np.linalg.norm(v2)
+    v3 /= np.linalg.norm(v3)
+    return np.array(
+        [
+            [v1[0], v2[0], v3[0], x],
+            [v1[1], v2[1], v3[1], y],
+            [v1[2], v2[2], v3[2], z],
+            [0, 0, 0, 1],
+        ]
+    )
+
+
+def topocentric_from_lla(lat, lon, alt: float, reflat, reflon, refalt: float):
+    """
+    Transform from lat, lon, alt to topocentric XYZ.
+
+    >>> lat, lon, alt = -10, 20, 100
+    >>> np.allclose(topocentric_from_lla(lat, lon, alt, lat, lon, alt),
+    ...     [0,0,0])
+    True
+    >>> x, y, z = topocentric_from_lla(lat, lon, alt, 0, 0, 0)
+    >>> np.allclose(lla_from_topocentric(x, y, z, 0, 0, 0),
+    ...     [lat, lon, alt])
+    True
+    """
+    T = np.linalg.inv(ecef_from_topocentric_transform(reflat, reflon, refalt))
+    x, y, z = ecef_from_lla(lat, lon, alt)
+    tx = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3]
+    ty = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3]
+    tz = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3]
+    return tx, ty, tz
+
+
+def lla_from_topocentric(x, y, z, reflat, reflon, refalt: float):
+    """
+    Transform from topocentric XYZ to lat, lon, alt.
+    """
+    T = ecef_from_topocentric_transform(reflat, reflon, refalt)
+    ex = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3]
+    ey = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3]
+    ez = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3]
+    return lla_from_ecef(ex, ey, ez)
+
+
+class TopocentricConverter(object):
+    """Convert to and from a topocentric reference frame."""
+
+    def __init__(self, reflat, reflon, refalt):
+        """Init the converter given the reference origin."""
+        self.lat = reflat
+        self.lon = reflon
+        self.alt = refalt
+
+    def to_topocentric(self, lat, lon, alt):
+        """Convert lat, lon, alt to topocentric x, y, z."""
+        return topocentric_from_lla(lat, lon, alt, self.lat, self.lon, self.alt)
+
+    def to_lla(self, x, y, z):
+        """Convert topocentric x, y, z to lat, lon, alt."""
+        return lla_from_topocentric(x, y, z, self.lat, self.lon, self.alt)
+
+    def __eq__(self, o):
+        return np.allclose([self.lat, self.lon, self.alt], (o.lat, o.lon, o.alt))
diff --git a/utils/geometry.py b/utils/geometry.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bcbcba7c41e689e9dd9e35fe33e7787fdd13b03
--- /dev/null
+++ b/utils/geometry.py
@@ -0,0 +1,68 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import numpy as np
+import torch
+
+
+def from_homogeneous(points, eps: float = 1e-8):
+    """Remove the homogeneous dimension of N-dimensional points.
+    Args:
+        points: torch.Tensor or numpy.ndarray with size (..., N+1).
+    Returns:
+        A torch.Tensor or numpy ndarray with size (..., N).
+    """
+    return points[..., :-1] / (points[..., -1:] + eps)
+
+
+def to_homogeneous(points):
+    """Convert N-dimensional points to homogeneous coordinates.
+    Args:
+        points: torch.Tensor or numpy.ndarray with size (..., N).
+    Returns:
+        A torch.Tensor or numpy.ndarray with size (..., N+1).
+    """
+    if isinstance(points, torch.Tensor):
+        pad = points.new_ones(points.shape[:-1] + (1,))
+        return torch.cat([points, pad], dim=-1)
+    elif isinstance(points, np.ndarray):
+        pad = np.ones((points.shape[:-1] + (1,)), dtype=points.dtype)
+        return np.concatenate([points, pad], axis=-1)
+    else:
+        raise ValueError
+
+
+@torch.jit.script
+def undistort_points(pts, dist):
+    dist = dist.unsqueeze(-2)  # add point dimension
+    ndist = dist.shape[-1]
+    undist = pts
+    valid = torch.ones(pts.shape[:-1], device=pts.device, dtype=torch.bool)
+    if ndist > 0:
+        k1, k2 = dist[..., :2].split(1, -1)
+        r2 = torch.sum(pts**2, -1, keepdim=True)
+        radial = k1 * r2 + k2 * r2**2
+        undist = undist + pts * radial
+
+        # The distortion model is supposedly only valid within the image
+        # boundaries. Because of the negative radial distortion, points that
+        # are far outside of the boundaries might actually be mapped back
+        # within the image. To account for this, we discard points that are
+        # beyond the inflection point of the distortion model,
+        # e.g. such that d(r + k_1 r^3 + k2 r^5)/dr = 0
+        limited = ((k2 > 0) & ((9 * k1**2 - 20 * k2) > 0)) | ((k2 <= 0) & (k1 > 0))
+        limit = torch.abs(
+            torch.where(
+                k2 > 0,
+                (torch.sqrt(9 * k1**2 - 20 * k2) - 3 * k1) / (10 * k2),
+                1 / (3 * k1),
+            )
+        )
+        valid = valid & torch.squeeze(~limited | (r2 < limit), -1)
+
+        if ndist > 2:
+            p12 = dist[..., 2:]
+            p21 = p12.flip(-1)
+            uv = torch.prod(pts, -1, keepdim=True)
+            undist = undist + 2 * p12 * uv + p21 * (r2 + 2 * pts**2)
+
+    return undist, valid
diff --git a/utils/io.py b/utils/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..e092ef8695a41f2fae6da4e03d7cad74eab5cadf
--- /dev/null
+++ b/utils/io.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import json
+import requests
+import shutil
+from pathlib import Path
+
+import cv2
+import numpy as np
+import torch
+from tqdm.auto import tqdm
+
+import logger
+
+DATA_URL = "https://cvg-data.inf.ethz.ch/OrienterNet_CVPR2023"
+
+
+def read_image(path, grayscale=False):
+    if grayscale:
+        mode = cv2.IMREAD_GRAYSCALE
+    else:
+        mode = cv2.IMREAD_COLOR
+    image = cv2.imread(str(path), mode)
+    if image is None:
+        raise ValueError(f"Cannot read image {path}.")
+    if not grayscale and len(image.shape) == 3:
+        image = np.ascontiguousarray(image[:, :, ::-1])  # BGR to RGB
+    return image
+
+
+def write_torch_image(path, image):
+    image_cv2 = np.round(image.clip(0, 1) * 255).astype(int)[..., ::-1]
+    cv2.imwrite(str(path), image_cv2)
+
+
+class JSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, (np.ndarray, torch.Tensor)):
+            return obj.tolist()
+        elif isinstance(obj, np.generic):
+            return obj.item()
+        return json.JSONEncoder.default(self, obj)
+
+
+def write_json(path, data):
+    with open(path, "w") as f:
+        json.dump(data, f, cls=JSONEncoder)
+
+
+def download_file(url, path):
+    path = Path(path)
+    if path.is_dir():
+        path = path / Path(url).name
+    path.parent.mkdir(exist_ok=True, parents=True)
+    logger.info("Downloading %s to %s.", url, path)
+    with requests.get(url, stream=True) as r:
+        total_length = int(r.headers.get("Content-Length"))
+        with tqdm.wrapattr(r.raw, "read", total=total_length, desc="") as raw:
+            with open(path, "wb") as output:
+                shutil.copyfileobj(raw, output)
+    return path
diff --git a/utils/sensor_data.json b/utils/sensor_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..ca6a23922096b17678929ecd0faa23ac43725b37
--- /dev/null
+++ b/utils/sensor_data.json
@@ -0,0 +1,3711 @@
+{
+    "Acer CE-5330": 5.75,
+    "Acer CE-5430": 5.75,
+    "Acer CE-6430": 5.75,
+    "Acer CI-6330": 7.11,
+    "Acer CI-6530": 7.11,
+    "Acer CI-8330": 7.11,
+    "Acer CL-5300": 5.75,
+    "Acer CP-8531": 7.11,
+    "Acer CP-8660": 7.11,
+    "Acer CR-5130": 7.11,
+    "Acer CR-6530": 7.11,
+    "Acer CR-8530": 7.11,
+    "Acer CS-5530": 5.75,
+    "Acer CS-5531": 5.75,
+    "Acer CS-6530": 5.75,
+    "Acer CS-6531": 5.75,
+    "Acer CU-6530": 5.75,
+    "AeroVironment Quantix": 6.08,
+    "AgfaPhoto Compact 100": 5.75,
+    "AgfaPhoto Compact 102": 6.16,
+    "AgfaPhoto Compact 103": 6.08,
+    "AgfaPhoto DC-1030i": 7.11,
+    "AgfaPhoto DC-1033m": 5.75,
+    "AgfaPhoto DC-1033x": 7.11,
+    "AgfaPhoto DC-1338i": 7.11,
+    "AgfaPhoto DC-1338sT": 7.11,
+    "AgfaPhoto DC-2030m": 6.16,
+    "AgfaPhoto DC-302": 4.8,
+    "AgfaPhoto DC-500": 4.8,
+    "AgfaPhoto DC-530i": 5.75,
+    "AgfaPhoto DC-533": 5.75,
+    "AgfaPhoto DC-600uw": 6.16,
+    "AgfaPhoto DC-630": 5.75,
+    "AgfaPhoto DC-630i": 5.75,
+    "AgfaPhoto DC-630x": 5.75,
+    "AgfaPhoto DC-633x": 5.75,
+    "AgfaPhoto DC-633xs": 5.75,
+    "AgfaPhoto DC-730i": 5.75,
+    "AgfaPhoto DC-733i": 5.75,
+    "AgfaPhoto DC-733s": 5.75,
+    "AgfaPhoto DC-735": 5.75,
+    "AgfaPhoto DC-735i": 5.75,
+    "AgfaPhoto DC-738i": 5.75,
+    "AgfaPhoto DC-830": 5.75,
+    "AgfaPhoto DC-830i": 7.11,
+    "AgfaPhoto DC-8330i": 5.75,
+    "AgfaPhoto DC-8338i": 5.75,
+    "AgfaPhoto DC-833m": 5.75,
+    "AgfaPhoto DC-8428s": 7.11,
+    "AgfaPhoto Optima 1": 6.08,
+    "AgfaPhoto Optima 100": 6.08,
+    "AgfaPhoto Optima 102": 6.08,
+    "AgfaPhoto Optima 103": 6.16,
+    "AgfaPhoto Optima 104": 6.08,
+    "AgfaPhoto Optima 105": 6.16,
+    "AgfaPhoto Optima 1338mT": 6.16,
+    "AgfaPhoto Optima 1438m": 6.08,
+    "AgfaPhoto Optima 3": 6.16,
+    "AgfaPhoto Optima 830UW": 5.75,
+    "AgfaPhoto Optima 8328m": 5.75,
+    "AgfaPhoto ePhoto 1280": 6.16,
+    "AgfaPhoto ePhoto 1680": 6.16,
+    "AgfaPhoto ePhoto CL18": 6.16,
+    "AgfaPhoto ePhoto CL30": 6.16,
+    "AgfaPhoto ePhoto CL30 Clik!": 6.16,
+    "AgfaPhoto ePhoto CL45": 6.16,
+    "AgfaPhoto ePhoto CL50": 6.16,
+    "AgfaPhoto sensor 505-D": 5.75,
+    "AgfaPhoto sensor 505-X": 5.75,
+    "AgfaPhoto sensor 530s": 5.75,
+    "AgfaPhoto sensor 830s": 5.75,
+    "Apple iPhone 4S": 4.57,
+    "Apple iPhone6,1": 4.89,
+    "Apple iPad 3": 4.54,
+    "Apple iPhone 3,1": 4.74,
+    "Apple iPhone 4": 4.74,
+    "Apple iPhone 5": 4.57,
+    "Apple iPhone 5S": 4.89,
+    "Apple iPhone 6": 4.89,
+    "Apple iPhone 6 Plus": 4.89,
+    "Apple iPhone3": 4.74,
+    "Apple iPhone4": 4.57,
+    "Apple iPhone3,1": 4.74,
+    "Apple iPhone4,1": 4.57,
+    "Apple iPhone4S": 4.57,
+    "Apple iPhone5,1": 4.57,
+    "Apple iPhone5,2": 4.57,
+    "Apple iPhone5,3": 4.57,
+    "Apple iPhone5,4": 4.57,
+    "Apple iPhone6": 4.89,
+    "Apple iPhone6,2": 4.89,
+    "BenQ AC100": 6.16,
+    "BenQ AE100": 6.16,
+    "BenQ C1420": 6.08,
+    "BenQ DC 2300": 4.5,
+    "BenQ DC 2410": 5.33,
+    "BenQ DC 3400": 4.5,
+    "BenQ DC 3410": 4.5,
+    "BenQ DC 4330": 5.75,
+    "BenQ DC 4500": 5.75,
+    "BenQ DC 5330": 5.75,
+    "BenQ DC C1000": 7.11,
+    "BenQ DC C1020": 6.16,
+    "BenQ DC C1030 Eco": 6.16,
+    "BenQ DC C1035": 6.16,
+    "BenQ DC C1050": 7.53,
+    "BenQ DC C1060": 6.16,
+    "BenQ DC C1220": 6.16,
+    "BenQ DC C1230": 6.16,
+    "BenQ DC C1250": 6.16,
+    "BenQ DC C1255": 6.16,
+    "BenQ DC C1430": 6.16,
+    "BenQ DC C1450": 6.16,
+    "BenQ DC C1460": 6.08,
+    "BenQ DC C1480": 6.16,
+    "BenQ DC C30": 5.75,
+    "BenQ DC C35": 5.75,
+    "BenQ DC C40": 5.75,
+    "BenQ DC C420": 5.75,
+    "BenQ DC C50": 7.11,
+    "BenQ DC C500": 5.75,
+    "BenQ DC C51": 5.75,
+    "BenQ DC C510": 5.75,
+    "BenQ DC C520": 5.75,
+    "BenQ DC C530": 5.75,
+    "BenQ DC C540": 5.75,
+    "BenQ DC C60": 7.11,
+    "BenQ DC C610": 5.75,
+    "BenQ DC C62": 7.11,
+    "BenQ DC C630": 5.75,
+    "BenQ DC C640": 7.53,
+    "BenQ DC C740": 5.75,
+    "BenQ DC C740i": 5.75,
+    "BenQ DC C750": 5.75,
+    "BenQ DC C800": 7.11,
+    "BenQ DC C840": 5.75,
+    "BenQ DC C850": 5.75,
+    "BenQ DC E1000": 7.11,
+    "BenQ DC E1020": 6.16,
+    "BenQ DC E1030": 6.16,
+    "BenQ DC E1035": 6.16,
+    "BenQ DC E1050": 7.53,
+    "BenQ DC E1050t": 6.16,
+    "BenQ DC E1220": 6.16,
+    "BenQ DC E1230": 6.16,
+    "BenQ DC E1240": 6.16,
+    "BenQ DC E1250": 6.16,
+    "BenQ DC E1260": 6.16,
+    "BenQ DC E1280": 6.16,
+    "BenQ DC E1420": 6.16,
+    "BenQ DC E1430": 6.16,
+    "BenQ DC E1460": 6.16,
+    "BenQ DC E1465": 6.16,
+    "BenQ DC E30": 6.4,
+    "BenQ DC E300": 6.4,
+    "BenQ DC E310": 6.4,
+    "BenQ DC E40": 5.75,
+    "BenQ DC E41": 5.75,
+    "BenQ DC E43": 5.75,
+    "BenQ DC E510": 5.75,
+    "BenQ DC E520": 5.75,
+    "BenQ DC E520 plus": 5.75,
+    "BenQ DC E53": 5.75,
+    "BenQ DC E600": 5.75,
+    "BenQ DC E605": 5.75,
+    "BenQ DC E610": 5.75,
+    "BenQ DC E63 Plus": 5.75,
+    "BenQ DC E720": 5.75,
+    "BenQ DC E800": 5.75,
+    "BenQ DC E820": 5.75,
+    "BenQ DC L1020": 6.16,
+    "BenQ DC P1410": 6.16,
+    "BenQ DC P500": 5.75,
+    "BenQ DC P860": 7.11,
+    "BenQ DC S1430": 6.08,
+    "BenQ DC S30": 5.33,
+    "BenQ DC S40": 5.75,
+    "BenQ DC T1260": 6.16,
+    "BenQ DC T700": 5.75,
+    "BenQ DC T800": 6.03,
+    "BenQ DC T850": 5.75,
+    "BenQ DC W1220": 6.16,
+    "BenQ DC X600": 5.75,
+    "BenQ DC X710": 5.75,
+    "BenQ DC X720": 5.75,
+    "BenQ DC X725": 5.75,
+    "BenQ DC X735": 5.75,
+    "BenQ DC X800": 5.75,
+    "BenQ DC X835": 5.75,
+    "BenQ E1480": 6.16,
+    "BenQ G1": 6.16,
+    "BenQ GH200": 6.16,
+    "BenQ GH600": 6.16,
+    "BenQ GH700": 6.16,
+    "BenQ LM100": 6.16,
+    "BenQ S1410": 6.08,
+    "BenQ S1420": 6.16,
+    "BenQ T1460": 6.16,
+    "Canon Digital IXUS": 5.33,
+    "Canon Digital IXUS 100 IS": 6.16,
+    "Canon Digital IXUS 110 IS": 6.16,
+    "Canon Digital IXUS 200 IS": 6.16,
+    "Canon Digital IXUS 300": 5.33,
+    "Canon Digital IXUS 330": 5.33,
+    "Canon Digital IXUS 40": 5.75,
+    "Canon Digital IXUS 400": 7.11,
+    "Canon Digital IXUS 430": 7.11,
+    "Canon Digital IXUS 50": 5.75,
+    "Canon Digital IXUS 500": 7.11,
+    "Canon Digital IXUS 60": 5.75,
+    "Canon Digital IXUS 65": 5.75,
+    "Canon Digital IXUS 80 IS": 5.75,
+    "Canon Digital IXUS 800 IS": 5.75,
+    "Canon Digital IXUS 85 IS": 6.16,
+    "Canon Digital IXUS 850 IS": 5.75,
+    "Canon Digital IXUS 860 IS": 5.75,
+    "Canon Digital IXUS 870 IS": 6.16,
+    "Canon Digital IXUS 90 IS": 6.16,
+    "Canon Digital IXUS 900 Ti": 7.11,
+    "Canon Digital IXUS 95 IS": 6.16,
+    "Canon Digital IXUS 950 IS": 5.75,
+    "Canon Digital IXUS 960 IS": 7.53,
+    "Canon Digital IXUS 970 IS": 6.16,
+    "Canon Digital IXUS 980 IS": 7.53,
+    "Canon Digital IXUS 990 IS": 6.16,
+    "Canon Digital IXUS II": 5.33,
+    "Canon Digital IXUS IIs": 5.33,
+    "Canon Digital IXUS V": 5.33,
+    "Canon Digital IXUS i": 5.75,
+    "Canon Digital IXUS i Zoom": 5.75,
+    "Canon Digital IXUS i7": 5.75,
+    "Canon Digital IXUS v2": 5.33,
+    "Canon Digital IXUS v3": 5.33,
+    "Canon ELPH 135 / IXUS 145": 6.16,
+    "Canon ELPH 140 IS / IXUS 150": 6.16,
+    "Canon ELPH 150 IS / IXUS 155": 6.16,
+    "Canon EOS 1000D": 22.2,
+    "Canon EOS 10D": 22.7,
+    "Canon EOS 20D": 22.5,
+    "Canon EOS 20Da": 22.5,
+    "Canon EOS 300D": 22.7,
+    "Canon EOS 30D": 22.5,
+    "Canon EOS 350D": 22.2,
+    "Canon EOS 400D": 22.2,
+    "Canon EOS 40D": 22.2,
+    "Canon EOS 450D": 22.2,
+    "Canon EOS 500D": 22.3,
+    "Canon EOS 50D": 22.3,
+    "Canon EOS 5D": 35.8,
+    "Canon EOS 5D Mark II": 36.0,
+    "Canon EOS 5D Mark III": 36.0,
+    "Canon EOS 60D": 22.3,
+    "Canon EOS 60Da": 22.3,
+    "Canon EOS 6D": 35.8,
+    "Canon EOS 70D": 22.5,
+    "Canon EOS 7D": 22.3,
+    "Canon EOS 7D Mark II": 22.4,
+    "Canon EOS DIGITAL REBEL XSi": 22.2,
+    "Canon EOS D30": 22.7,
+    "Canon EOS D60": 22.7,
+    "Canon EOS M": 22.3,
+    "Canon EOS Rebel SL1 / 100D": 22.3,
+    "Canon EOS Rebel T2i / 550D": 22.3,
+    "Canon EOS Rebel T3 / 1100D": 22.2,
+    "Canon EOS Rebel T3i / 600D": 22.3,
+    "Canon EOS Rebel T4i / 650D": 22.3,
+    "Canon EOS Rebel T5 / 1200D": 22.3,
+    "Canon EOS Rebel T5i / 700D": 22.3,
+    "Canon EOS-1D": 28.7,
+    "Canon EOS-1D C": 36.0,
+    "Canon EOS-1D Mark II": 28.7,
+    "Canon EOS-1D Mark II N": 28.7,
+    "Canon EOS-1D Mark III": 28.7,
+    "Canon EOS-1D Mark IV": 27.9,
+    "Canon EOS-1D X": 36.0,
+    "Canon EOS-1Ds": 35.8,
+    "Canon EOS-1Ds Mark II": 36.0,
+    "Canon EOS-1Ds Mark III": 36.0,
+    "Canon IXUS 1000 HS": 6.16,
+    "Canon IXUS 105": 6.16,
+    "Canon IXUS 1100 HS": 6.16,
+    "Canon IXUS 115 HS": 6.16,
+    "Canon IXUS 125 HS": 6.16,
+    "Canon IXUS 130": 6.16,
+    "Canon IXUS 132": 6.16,
+    "Canon IXUS 210": 6.16,
+    "Canon IXUS 220 HS": 6.16,
+    "Canon IXUS 230 HS": 6.16,
+    "Canon IXUS 240 HS": 6.16,
+    "Canon IXUS 300 HS": 6.16,
+    "Canon IXUS 310 HS": 6.16,
+    "Canon IXUS 500 HS": 6.16,
+    "Canon IXUS 510 HS": 6.16,
+    "Canon PowerShot 350": 4.8,
+    "Canon PowerShot 600": 4.8,
+    "Canon PowerShot A1000 IS": 6.16,
+    "Canon PowerShot A1300": 6.16,
+    "Canon PowerShot A1400": 6.16,
+    "Canon PowerShot A2000 IS": 6.16,
+    "Canon PowerShot A2300": 6.16,
+    "Canon PowerShot A2400 IS": 6.16,
+    "Canon PowerShot A2500": 6.16,
+    "Canon PowerShot A2600": 6.16,
+    "Canon PowerShot A3400 IS": 6.16,
+    "Canon PowerShot A3500 IS": 6.16,
+    "Canon PowerShot A400": 4.5,
+    "Canon PowerShot A4000 IS": 6.16,
+    "Canon PowerShot A430": 4.8,
+    "Canon PowerShot A450": 4.8,
+    "Canon PowerShot A460": 4.8,
+    "Canon PowerShot A470": 5.75,
+    "Canon PowerShot A480": 6.16,
+    "Canon PowerShot A490": 6.16,
+    "Canon PowerShot A495": 6.16,
+    "Canon PowerShot A5": 4.8,
+    "Canon PowerShot A5 Zoom": 4.8,
+    "Canon PowerShot A50": 4.8,
+    "Canon PowerShot A530": 5.75,
+    "Canon PowerShot A550": 5.75,
+    "Canon PowerShot A560": 5.75,
+    "Canon PowerShot A570 IS": 5.75,
+    "Canon PowerShot A580": 5.75,
+    "Canon PowerShot A590 IS": 5.75,
+    "Canon PowerShot A620": 7.11,
+    "Canon PowerShot A630": 7.11,
+    "Canon PowerShot A640": 7.11,
+    "Canon PowerShot A650 IS": 7.53,
+    "Canon PowerShot A700": 5.75,
+    "Canon PowerShot A710 IS": 5.75,
+    "Canon PowerShot A720 IS": 5.75,
+    "Canon PowerShot A800": 6.16,
+    "Canon PowerShot A810": 6.16,
+    "Canon PowerShot A85": 5.33,
+    "Canon PowerShot A95": 7.11,
+    "Canon PowerShot D10": 6.16,
+    "Canon PowerShot D20": 6.16,
+    "Canon PowerShot D30": 6.16,
+    "Canon PowerShot E1": 6.16,
+    "Canon PowerShot ELPH 100 HS": 6.16,
+    "Canon PowerShot ELPH 110 HS": 6.16,
+    "Canon PowerShot ELPH 115 IS": 6.16,
+    "Canon PowerShot ELPH 130 IS": 6.16,
+    "Canon PowerShot ELPH 300 HS": 6.16,
+    "Canon PowerShot ELPH 310 HS": 6.16,
+    "Canon PowerShot ELPH 320 HS": 6.16,
+    "Canon PowerShot ELPH 330 HS": 6.16,
+    "Canon PowerShot ELPH 340 HS": 6.16,
+    "Canon PowerShot ELPH 500 HS": 6.16,
+    "Canon PowerShot ELPH 510 HS": 6.16,
+    "Canon PowerShot ELPH 520 HS": 6.16,
+    "Canon PowerShot Elph 530 HS": 6.16,
+    "Canon PowerShot G1 X": 18.7,
+    "Canon PowerShot G1 X Mark II": 18.7,
+    "Canon PowerShot G10": 7.53,
+    "Canon PowerShot G11": 7.53,
+    "Canon PowerShot G12": 7.53,
+    "Canon PowerShot G15": 7.53,
+    "Canon PowerShot G16": 7.53,
+    "Canon PowerShot G6": 7.11,
+    "Canon PowerShot G7": 7.11,
+    "Canon PowerShot G7 X": 13.2,
+    "Canon PowerShot G9": 7.53,
+    "Canon PowerShot G9 X": 13.2,
+    "Canon PowerShot N": 6.16,
+    "Canon PowerShot N100": 7.53,
+    "Canon PowerShot N2": 6.16,
+    "Canon PowerShot Pro1": 8.8,
+    "Canon PowerShot Pro70": 6.4,
+    "Canon PowerShot Pro90 IS": 7.11,
+    "Canon PowerShot S100": 7.53,
+    "Canon PowerShot S100 Digital IXUS": 5.33,
+    "Canon PowerShot S110": 7.53,
+    "Canon PowerShot S120": 7.53,
+    "Canon PowerShot S200": 5.33,
+    "Canon PowerShot S230": 5.33,
+    "Canon PowerShot S3 IS": 5.75,
+    "Canon PowerShot S300": 5.33,
+    "Canon PowerShot S330": 5.33,
+    "Canon PowerShot S400": 7.11,
+    "Canon PowerShot S410": 7.11,
+    "Canon PowerShot S5 IS": 5.75,
+    "Canon PowerShot S500": 7.11,
+    "Canon PowerShot S60": 7.11,
+    "Canon PowerShot S70": 7.11,
+    "Canon PowerShot S90": 7.53,
+    "Canon PowerShot S95": 7.53,
+    "Canon PowerShot SD10": 5.75,
+    "Canon PowerShot SD100": 5.33,
+    "Canon PowerShot SD1000": 5.75,
+    "Canon PowerShot SD110": 5.33,
+    "Canon PowerShot SD1100 IS": 5.75,
+    "Canon PowerShot SD1200 IS": 6.16,
+    "Canon PowerShot SD1300 IS": 6.16,
+    "Canon PowerShot SD1400 IS": 6.16,
+    "Canon PowerShot SD20": 5.75,
+    "Canon PowerShot SD200": 5.75,
+    "Canon PowerShot SD30": 5.75,
+    "Canon PowerShot SD300": 5.75,
+    "Canon PowerShot SD3500 IS": 6.16,
+    "Canon PowerShot SD40": 5.75,
+    "Canon PowerShot SD400": 5.75,
+    "Canon PowerShot SD4000 IS": 6.16,
+    "Canon PowerShot SD430 Wireless": 5.75,
+    "Canon PowerShot SD450": 5.75,
+    "Canon PowerShot SD4500 IS": 6.16,
+    "Canon PowerShot SD500": 7.11,
+    "Canon PowerShot SD550": 7.11,
+    "Canon PowerShot SD600": 5.75,
+    "Canon PowerShot SD630": 5.75,
+    "Canon PowerShot SD700 IS": 5.75,
+    "Canon PowerShot SD750": 5.75,
+    "Canon PowerShot SD770 IS": 6.16,
+    "Canon PowerShot SD780 IS": 6.16,
+    "Canon PowerShot SD790 IS": 6.16,
+    "Canon PowerShot SD800 IS": 5.75,
+    "Canon PowerShot SD850 IS": 5.75,
+    "Canon PowerShot SD870 IS": 5.75,
+    "Canon PowerShot SD880 IS": 6.16,
+    "Canon PowerShot SD890 IS": 6.16,
+    "Canon PowerShot SD900": 7.11,
+    "Canon PowerShot SD940 IS": 6.16,
+    "Canon PowerShot SD950 IS": 7.53,
+    "Canon PowerShot SD960 IS": 6.16,
+    "Canon PowerShot SD970 IS": 6.16,
+    "Canon PowerShot SD980 IS": 6.16,
+    "Canon PowerShot SD990 IS": 7.53,
+    "Canon PowerShot SX1 IS": 6.16,
+    "Canon PowerShot SX10 IS": 6.16,
+    "Canon PowerShot SX100 IS": 5.75,
+    "Canon PowerShot SX110 IS": 6.16,
+    "Canon PowerShot SX120 IS": 5.75,
+    "Canon PowerShot SX130 IS": 6.16,
+    "Canon PowerShot SX150 IS": 6.16,
+    "Canon PowerShot SX160 IS": 6.16,
+    "Canon PowerShot SX170 IS": 6.16,
+    "Canon PowerShot SX20 IS": 6.16,
+    "Canon PowerShot SX200 IS": 6.16,
+    "Canon PowerShot SX210 IS": 6.16,
+    "Canon PowerShot SX220 HS": 6.16,
+    "Canon PowerShot SX230 HS": 6.16,
+    "Canon PowerShot SX240 HS": 6.16,
+    "Canon PowerShot SX260 HS": 6.16,
+    "Canon PowerShot SX270 HS": 6.16,
+    "Canon PowerShot SX280 HS": 6.16,
+    "Canon PowerShot SX30 IS": 6.16,
+    "Canon PowerShot SX40 HS": 6.16,
+    "Canon PowerShot SX400 IS": 6.16,
+    "Canon PowerShot SX50 HS": 6.16,
+    "Canon PowerShot SX500 IS": 6.16,
+    "Canon PowerShot SX510 HS": 6.16,
+    "Canon PowerShot SX520 HS": 6.16,
+    "Canon PowerShot SX60 HS": 6.16,
+    "Canon PowerShot SX600 HS": 6.16,
+    "Canon PowerShot SX700 HS": 6.16,
+    "Canon PowerShot TX1": 5.75,
+    "Canon Powershot A10": 5.33,
+    "Canon Powershot A100": 4.5,
+    "Canon Powershot A1100 IS": 6.16,
+    "Canon Powershot A1200": 6.16,
+    "Canon Powershot A20": 5.33,
+    "Canon Powershot A200": 4.5,
+    "Canon Powershot A2100 IS": 6.16,
+    "Canon Powershot A2200": 6.16,
+    "Canon Powershot A30": 5.33,
+    "Canon Powershot A300": 5.33,
+    "Canon Powershot A3000 IS": 6.16,
+    "Canon Powershot A310": 5.33,
+    "Canon Powershot A3100 IS": 6.16,
+    "Canon Powershot A3200 IS": 6.16,
+    "Canon Powershot A3300 IS": 6.16,
+    "Canon Powershot A40": 5.33,
+    "Canon Powershot A410": 4.5,
+    "Canon Powershot A420": 4.8,
+    "Canon Powershot A510": 5.33,
+    "Canon Powershot A520": 5.75,
+    "Canon Powershot A540": 5.75,
+    "Canon Powershot A60": 5.33,
+    "Canon Powershot A610": 7.11,
+    "Canon Powershot A70": 5.33,
+    "Canon Powershot A75": 5.33,
+    "Canon Powershot A80": 7.11,
+    "Canon Powershot G1": 7.11,
+    "Canon Powershot G2": 7.11,
+    "Canon Powershot G3": 7.11,
+    "Canon Powershot G5": 7.11,
+    "Canon Powershot S1 IS": 5.33,
+    "Canon Powershot S10": 6.4,
+    "Canon Powershot S2 IS": 5.75,
+    "Canon Powershot S20": 7.11,
+    "Canon Powershot S30": 7.11,
+    "Canon Powershot S40": 7.11,
+    "Canon Powershot S45": 7.11,
+    "Canon Powershot S50": 7.11,
+    "Canon Powershot S80": 7.11,
+    "Canon Pro90 IS": 7.11,
+    "Canon S200": 7.53,
+    "Canon SX220 HS": 6.16,
+    "Casio EX-FR10": 6.16,
+    "Casio EX-N1": 6.16,
+    "Casio EX-N10": 6.16,
+    "Casio EX-N20": 6.16,
+    "Casio EX-N5": 6.16,
+    "Casio EX-N50": 6.16,
+    "Casio EX-TR10": 6.16,
+    "Casio EX-TR15": 6.16,
+    "Casio EX-ZR400": 6.16,
+    "Casio EX-ZR700": 6.16,
+    "Casio EX-ZR800": 6.16,
+    "Casio EX-ZS30": 6.16,
+    "Casio EXILIM  EX-JE10": 6.16,
+    "Casio EXILIM EX-FC100": 6.16,
+    "Casio EXILIM EX-FC150": 6.16,
+    "Casio EXILIM EX-FC160s": 6.16,
+    "Casio EXILIM EX-FH100": 6.16,
+    "Casio EXILIM EX-FH150": 6.16,
+    "Casio EXILIM EX-FH20": 6.16,
+    "Casio EXILIM EX-FH25": 6.16,
+    "Casio EXILIM EX-FS10": 6.16,
+    "Casio EXILIM EX-G1": 6.16,
+    "Casio EXILIM EX-H10": 6.16,
+    "Casio EXILIM EX-H15": 6.16,
+    "Casio EXILIM EX-H20G": 6.16,
+    "Casio EXILIM EX-H30": 6.16,
+    "Casio EXILIM EX-H5": 6.16,
+    "Casio EXILIM EX-H50": 6.16,
+    "Casio EXILIM EX-M1": 5.33,
+    "Casio EXILIM EX-M2": 7.11,
+    "Casio EXILIM EX-M20": 5.33,
+    "Casio EXILIM EX-S1": 5.33,
+    "Casio EXILIM EX-S10": 6.16,
+    "Casio EXILIM EX-S12": 6.16,
+    "Casio EXILIM EX-S2": 7.11,
+    "Casio EXILIM EX-S20": 5.33,
+    "Casio EXILIM EX-S200": 6.16,
+    "Casio EXILIM EX-S3": 7.11,
+    "Casio EXILIM EX-S5": 6.16,
+    "Casio EXILIM EX-S600D": 5.75,
+    "Casio EXILIM EX-S7": 6.16,
+    "Casio EXILIM EX-S770": 5.75,
+    "Casio EXILIM EX-S770D": 5.75,
+    "Casio EXILIM EX-S8": 6.08,
+    "Casio EXILIM EX-S880": 5.75,
+    "Casio EXILIM EX-TR100": 6.16,
+    "Casio EXILIM EX-TR150": 6.16,
+    "Casio EXILIM EX-V7": 5.75,
+    "Casio EXILIM EX-V8": 5.75,
+    "Casio EXILIM EX-Z1": 6.16,
+    "Casio EXILIM EX-Z10": 5.75,
+    "Casio EXILIM EX-Z100": 6.16,
+    "Casio EXILIM EX-Z1000": 7.11,
+    "Casio EXILIM EX-Z1050": 7.31,
+    "Casio EXILIM EX-Z1080": 7.31,
+    "Casio EXILIM EX-Z110": 5.75,
+    "Casio EXILIM EX-Z120": 7.11,
+    "Casio EXILIM EX-Z150": 5.75,
+    "Casio EXILIM EX-Z16": 6.16,
+    "Casio EXILIM EX-Z19": 5.75,
+    "Casio EXILIM EX-Z2": 6.16,
+    "Casio EXILIM EX-Z20": 5.75,
+    "Casio EXILIM EX-Z200": 6.16,
+    "Casio EXILIM EX-Z2000": 6.16,
+    "Casio EXILIM EX-Z2300": 6.16,
+    "Casio EXILIM EX-Z25": 6.16,
+    "Casio EXILIM EX-Z250": 5.75,
+    "Casio EXILIM EX-Z270": 5.75,
+    "Casio EXILIM EX-Z280": 5.75,
+    "Casio EXILIM EX-Z29": 5.75,
+    "Casio EXILIM EX-Z3": 5.75,
+    "Casio EXILIM EX-Z30": 5.75,
+    "Casio EXILIM EX-Z300": 6.16,
+    "Casio EXILIM EX-Z3000": 6.16,
+    "Casio EXILIM EX-Z33": 6.16,
+    "Casio EXILIM EX-Z330": 6.16,
+    "Casio EXILIM EX-Z35": 6.16,
+    "Casio EXILIM EX-Z350": 6.16,
+    "Casio EXILIM EX-Z4": 5.75,
+    "Casio EXILIM EX-Z40": 5.75,
+    "Casio EXILIM EX-Z400": 6.16,
+    "Casio EXILIM EX-Z450": 6.16,
+    "Casio EXILIM EX-Z5": 5.75,
+    "Casio EXILIM EX-Z50": 5.75,
+    "Casio EXILIM EX-Z55": 5.75,
+    "Casio EXILIM EX-Z550": 6.16,
+    "Casio EXILIM EX-Z6": 5.75,
+    "Casio EXILIM EX-Z60": 7.11,
+    "Casio EXILIM EX-Z600": 5.75,
+    "Casio EXILIM EX-Z65": 5.75,
+    "Casio EXILIM EX-Z7": 5.75,
+    "Casio EXILIM EX-Z70": 5.75,
+    "Casio EXILIM EX-Z700": 5.75,
+    "Casio EXILIM EX-Z75": 5.75,
+    "Casio EXILIM EX-Z77": 5.75,
+    "Casio EXILIM EX-Z8": 5.75,
+    "Casio EXILIM EX-Z80": 5.75,
+    "Casio EXILIM EX-Z800": 6.16,
+    "Casio EXILIM EX-Z85": 5.75,
+    "Casio EXILIM EX-Z850": 7.11,
+    "Casio EXILIM EX-Z9": 5.75,
+    "Casio EXILIM EX-Z90": 6.16,
+    "Casio EXILIM EX-ZR10": 6.16,
+    "Casio EXILIM EX-ZR100": 6.16,
+    "Casio EXILIM EX-ZR15": 6.16,
+    "Casio EXILIM EX-ZR20": 6.16,
+    "Casio EXILIM EX-ZR200": 6.16,
+    "Casio EXILIM EX-ZR300": 6.16,
+    "Casio EXILIM EX-ZS10": 6.16,
+    "Casio EXILIM EX-ZS100": 6.16,
+    "Casio EXILIM EX-ZS12": 6.16,
+    "Casio EXILIM EX-ZS15": 6.16,
+    "Casio EXILIM EX-ZS150": 6.16,
+    "Casio EXILIM EX-ZS20": 6.16,
+    "Casio EXILIM EX-ZS6": 6.16,
+    "Casio EXILIM Pro EX-F1": 7.11,
+    "Casio EXILIM QV-R100": 6.16,
+    "Casio Exilim EX-10": 7.53,
+    "Casio Exilim EX-100": 7.53,
+    "Casio Exilim EX-P505": 5.75,
+    "Casio Exilim EX-P600": 7.11,
+    "Casio Exilim EX-P700": 7.11,
+    "Casio Exilim EX-S100": 4.5,
+    "Casio Exilim EX-S500": 5.75,
+    "Casio Exilim EX-S600": 5.75,
+    "Casio Exilim EX-Z1200 SR": 7.53,
+    "Casio Exilim EX-Z500": 5.75,
+    "Casio Exilim EX-Z57": 5.75,
+    "Casio Exilim EX-Z750": 7.11,
+    "Casio Exilim EX-ZR1000": 6.16,
+    "Casio Exilim EX-ZR1100": 6.16,
+    "Casio Exilim EX-ZS5": 6.16,
+    "Casio Exilim TRYX": 6.16,
+    "Casio GV-10": 4.5,
+    "Casio GV-20": 4.5,
+    "Casio QV-2000UX": 6.4,
+    "Casio QV-2100": 5.33,
+    "Casio QV-2300UX": 5.33,
+    "Casio QV-2400UX": 5.33,
+    "Casio QV-2800UX": 5.33,
+    "Casio QV-2900UX": 5.33,
+    "Casio QV-300": 4.8,
+    "Casio QV-3000EX": 7.11,
+    "Casio QV-3500EX": 7.11,
+    "Casio QV-3EX / XV-3": 7.11,
+    "Casio QV-4000": 7.11,
+    "Casio QV-5000SX": 4.8,
+    "Casio QV-5500SX": 4.8,
+    "Casio QV-5700": 7.11,
+    "Casio QV-700": 4.8,
+    "Casio QV-7000SX": 4.8,
+    "Casio QV-770": 4.8,
+    "Casio QV-8000SX": 4.8,
+    "Casio QV-R3": 7.11,
+    "Casio QV-R4": 7.11,
+    "Casio QV-R40": 7.11,
+    "Casio QV-R41": 7.11,
+    "Casio QV-R51": 7.11,
+    "Casio QV-R52": 7.11,
+    "Casio QV-R61": 7.11,
+    "Casio QV-R62": 7.11,
+    "Concord 00.": 6.4,
+    "Concord 2.": 6.4,
+    "Concord 3345z": 6.4,
+    "Concord 3346z": 6.4,
+    "Concord 40.": 7.11,
+    "Concord 42.": 5.75,
+    "Concord 43.": 6.4,
+    "Concord 4340z": 5.75,
+    "Concord 45.": 6.4,
+    "Concord 46.": 6.4,
+    "Concord 47.": 6.4,
+    "Concord 5340z": 5.75,
+    "Concord 5345z": 7.11,
+    "Concord 6340z": 7.11,
+    "Concord DVx": 6.4,
+    "Concord ES500z": 5.75,
+    "Concord ES510z": 7.11,
+    "Concord Eye-Q 1000": 6.4,
+    "Concord Eye-Q 1300": 6.4,
+    "Concord Eye-Q 2040": 6.4,
+    "Concord Eye-Q 2133z": 6.4,
+    "Concord Eye-Q 3040AF": 6.4,
+    "Concord Eye-Q 3103": 6.4,
+    "Concord Eye-Q 3120 AF": 4.0,
+    "Concord Eye-Q 3132z": 6.4,
+    "Concord Eye-Q 3340z": 5.33,
+    "Concord Eye-Q 3341z": 7.11,
+    "Concord Eye-Q 3343z": 5.33,
+    "Concord Eye-Q 4060AF": 7.11,
+    "Concord Eye-Q 4330z": 7.11,
+    "Concord Eye-Q 4342z": 7.11,
+    "Concord Eye-Q 4360z": 7.11,
+    "Concord Eye-Q 4363z": 7.11,
+    "Concord Eye-Q 5062AF": 7.11,
+    "Concord Eye-Q 5330z": 7.11,
+    "Concord Eye-Q Duo 2000": 7.11,
+    "Concord Eye-Q Duo LCD": 6.4,
+    "Concord Eye-Q Go 2000": 7.11,
+    "Concord Eye-Q Go LCD": 6.4,
+    "Concord Eye-Q Go Wireless": 7.11,
+    "Contax N Digital": 36.0,
+    "Contax SL300R T": 5.33,
+    "Contax TVS Digital": 7.11,
+    "Contax U4R": 5.33,
+    "Contax i4R": 5.33,
+    "DJI FC300S": 6.16,
+    "DJI FC300C": 6.31,
+    "DJI FC300X": 6.2,
+    "DJI FC350": 6.17,
+    "DJI FC350Z": 6.17,
+    "DJI FC330": 6.25,
+    "DJI FC220": 6.17,
+    "DJI FC2103": 6.07,
+    "DJI FC6310": 13.2,
+    "DJI FC6540": 23.5,
+    "Epson L-500V": 5.75,
+    "Epson PhotoPC 3000 Zoom": 7.11,
+    "Epson PhotoPC 3100 Zoom": 7.11,
+    "Epson PhotoPC 500": 4.8,
+    "Epson PhotoPC 550": 4.8,
+    "Epson PhotoPC 600": 4.8,
+    "Epson PhotoPC 650": 4.8,
+    "Epson PhotoPC 700": 4.8,
+    "Epson PhotoPC 750 Zoom": 6.4,
+    "Epson PhotoPC 800": 6.4,
+    "Epson PhotoPC 850 Zoom": 6.4,
+    "Epson PhotoPC L-200": 5.75,
+    "Epson PhotoPC L-300": 5.75,
+    "Epson PhotoPC L-400": 5.75,
+    "Epson PhotoPC L-410": 5.75,
+    "Epson PhotoPC L-500V": 5.75,
+    "Epson R-D1": 23.7,
+    "Epson R-D1xG": 23.7,
+    "Epson RD-1s": 23.7,
+    "Fujifilm A850": 5.75,
+    "Fujifilm Bigjob HD-3W": 5.75,
+    "Fujifilm Bigjob HD1": 5.33,
+    "Fujifilm DS-260HD": 6.4,
+    "Fujifilm DS-300": 8.8,
+    "Fujifilm Digital Q1": 6.4,
+    "Fujifilm FinePix 1300": 5.33,
+    "Fujifilm FinePix 1400z": 5.33,
+    "Fujifilm FinePix 2300": 5.33,
+    "Fujifilm FinePix 2400 Zoom": 5.33,
+    "Fujifilm FinePix 2600 Zoom": 5.33,
+    "Fujifilm FinePix 2650": 5.33,
+    "Fujifilm FinePix 2800 Zoom": 5.33,
+    "Fujifilm FinePix 3800": 5.33,
+    "Fujifilm FinePix 40i": 5.33,
+    "Fujifilm FinePix 4700 Zoom": 7.53,
+    "Fujifilm FinePix 4800 Zoom": 7.53,
+    "Fujifilm FinePix 4900 Zoom": 7.53,
+    "Fujifilm FinePix 50i": 7.53,
+    "Fujifilm FinePix 6800 Zoom": 7.53,
+    "Fujifilm FinePix 6900 Zoom": 7.53,
+    "Fujifilm FinePix A100": 6.16,
+    "Fujifilm FinePix A101": 5.33,
+    "Fujifilm FinePix A120": 5.33,
+    "Fujifilm FinePix A150": 6.16,
+    "Fujifilm FinePix A160": 6.16,
+    "Fujifilm FinePix A170": 6.16,
+    "Fujifilm FinePix A175": 6.16,
+    "Fujifilm FinePix A180": 6.16,
+    "Fujifilm FinePix A200": 5.33,
+    "Fujifilm FinePix A201": 5.33,
+    "Fujifilm FinePix A202": 5.33,
+    "Fujifilm FinePix A203": 5.33,
+    "Fujifilm FinePix A204": 5.33,
+    "Fujifilm FinePix A205 Zoom": 5.33,
+    "Fujifilm FinePix A210 Zoom": 5.33,
+    "Fujifilm FinePix A220": 6.16,
+    "Fujifilm FinePix A225": 6.16,
+    "Fujifilm FinePix A235": 6.16,
+    "Fujifilm FinePix A303": 5.33,
+    "Fujifilm FinePix A310 Zoom": 5.33,
+    "Fujifilm FinePix A330": 5.33,
+    "Fujifilm FinePix A340": 5.33,
+    "Fujifilm FinePix A345 Zoom": 5.75,
+    "Fujifilm FinePix A350 Zoom": 5.75,
+    "Fujifilm FinePix A400 Zoom": 5.75,
+    "Fujifilm FinePix A500 Zoom": 5.75,
+    "Fujifilm FinePix A510": 5.75,
+    "Fujifilm FinePix A600 Zoom": 7.53,
+    "Fujifilm FinePix A610": 5.75,
+    "Fujifilm FinePix A700": 8.0,
+    "Fujifilm FinePix A800": 8.0,
+    "Fujifilm FinePix A820": 8.0,
+    "Fujifilm FinePix A825": 8.0,
+    "Fujifilm FinePix A900": 8.0,
+    "Fujifilm FinePix A920": 8.0,
+    "Fujifilm FinePix AV100": 6.16,
+    "Fujifilm FinePix AV105": 6.16,
+    "Fujifilm FinePix AV110": 6.16,
+    "Fujifilm FinePix AV130": 6.16,
+    "Fujifilm FinePix AV140": 6.16,
+    "Fujifilm FinePix AV150": 6.16,
+    "Fujifilm FinePix AV180": 6.16,
+    "Fujifilm FinePix AV200": 6.16,
+    "Fujifilm FinePix AV205": 6.16,
+    "Fujifilm FinePix AV250": 6.16,
+    "Fujifilm FinePix AV255": 6.16,
+    "Fujifilm FinePix AX200": 6.16,
+    "Fujifilm FinePix AX205": 6.16,
+    "Fujifilm FinePix AX230": 6.16,
+    "Fujifilm FinePix AX245w": 6.16,
+    "Fujifilm FinePix AX250": 6.16,
+    "Fujifilm FinePix AX280": 6.16,
+    "Fujifilm FinePix AX300": 6.16,
+    "Fujifilm FinePix AX305": 6.16,
+    "Fujifilm FinePix AX350": 6.16,
+    "Fujifilm FinePix AX355": 6.16,
+    "Fujifilm FinePix AX500": 6.16,
+    "Fujifilm FinePix AX550": 6.16,
+    "Fujifilm FinePix AX650": 6.16,
+    "Fujifilm FinePix E500 Zoom": 5.75,
+    "Fujifilm FinePix E510 Zoom": 5.75,
+    "Fujifilm FinePix E550 Zoom": 7.53,
+    "Fujifilm FinePix E900 Zoom": 8.0,
+    "Fujifilm FinePix EX-20": 5.33,
+    "Fujifilm FinePix F10 Zoom": 7.53,
+    "Fujifilm FinePix F100fd": 8.0,
+    "Fujifilm FinePix F11 Zoom": 7.53,
+    "Fujifilm FinePix F20 Zoom": 7.53,
+    "Fujifilm FinePix F200EXR": 8.0,
+    "Fujifilm FinePix F30 Zoom": 7.53,
+    "Fujifilm FinePix F300EXR": 6.4,
+    "Fujifilm FinePix F305EXR": 6.4,
+    "Fujifilm FinePix F31fd": 7.53,
+    "Fujifilm FinePix F401 Zoom": 5.33,
+    "Fujifilm FinePix F402": 5.33,
+    "Fujifilm FinePix F40fd": 8.0,
+    "Fujifilm FinePix F410 Zoom": 5.33,
+    "Fujifilm FinePix F420 Zoom": 5.33,
+    "Fujifilm FinePix F440 Zoom": 5.75,
+    "Fujifilm FinePix F450 Zoom": 5.75,
+    "Fujifilm FinePix F455 Zoom": 5.75,
+    "Fujifilm FinePix F45fd": 8.0,
+    "Fujifilm FinePix F460": 5.75,
+    "Fujifilm FinePix F470 Zoom": 5.75,
+    "Fujifilm FinePix F47fd": 8.0,
+    "Fujifilm FinePix F480 Zoom": 5.75,
+    "Fujifilm FinePix F500 EXR": 6.4,
+    "Fujifilm FinePix F50fd": 8.0,
+    "Fujifilm FinePix F550 EXR": 6.4,
+    "Fujifilm FinePix F600 EXR": 6.4,
+    "Fujifilm FinePix F601 Zoom": 7.53,
+    "Fujifilm FinePix F605EXR": 6.4,
+    "Fujifilm FinePix F60fd": 8.0,
+    "Fujifilm FinePix F610": 7.53,
+    "Fujifilm FinePix F650 Zoom": 5.75,
+    "Fujifilm FinePix F660EXR": 6.4,
+    "Fujifilm FinePix F700": 7.53,
+    "Fujifilm FinePix F70EXR": 6.4,
+    "Fujifilm FinePix F710": 7.53,
+    "Fujifilm FinePix F72EXR": 6.4,
+    "Fujifilm FinePix F750EXR": 6.4,
+    "Fujifilm FinePix F75EXR": 6.4,
+    "Fujifilm FinePix F770EXR": 6.4,
+    "Fujifilm FinePix F800EXR": 6.4,
+    "Fujifilm FinePix F80EXR": 6.4,
+    "Fujifilm FinePix F810 Zoom": 7.53,
+    "Fujifilm FinePix F850EXR": 6.4,
+    "Fujifilm FinePix F85EXR": 6.4,
+    "Fujifilm FinePix F900EXR": 6.4,
+    "Fujifilm FinePix HS10": 6.16,
+    "Fujifilm FinePix HS11": 6.16,
+    "Fujifilm FinePix HS20 EXR": 6.4,
+    "Fujifilm FinePix HS22 EXR": 6.4,
+    "Fujifilm FinePix HS25 EXR": 6.4,
+    "Fujifilm FinePix HS30 EXR": 6.4,
+    "Fujifilm FinePix HS35 EXR": 6.4,
+    "Fujifilm FinePix HS50 EXR": 6.4,
+    "Fujifilm FinePix IS Pro": 23.0,
+    "Fujifilm FinePix IS-1": 8.0,
+    "Fujifilm FinePix J10": 5.75,
+    "Fujifilm FinePix J100": 6.16,
+    "Fujifilm FinePix J110w": 6.16,
+    "Fujifilm FinePix J12": 5.75,
+    "Fujifilm FinePix J120": 6.16,
+    "Fujifilm FinePix J15": 5.75,
+    "Fujifilm FinePix J150w": 6.16,
+    "Fujifilm FinePix J20": 6.16,
+    "Fujifilm FinePix J210": 6.16,
+    "Fujifilm FinePix J22": 6.16,
+    "Fujifilm FinePix J25": 6.16,
+    "Fujifilm FinePix J250": 6.16,
+    "Fujifilm FinePix J26": 6.16,
+    "Fujifilm FinePix J27": 6.16,
+    "Fujifilm FinePix J28": 6.16,
+    "Fujifilm FinePix J29": 6.16,
+    "Fujifilm FinePix J30": 6.16,
+    "Fujifilm FinePix J32": 6.16,
+    "Fujifilm FinePix J35": 6.16,
+    "Fujifilm FinePix J37": 6.16,
+    "Fujifilm FinePix J38": 6.16,
+    "Fujifilm FinePix J50": 5.75,
+    "Fujifilm FinePix JV100": 6.16,
+    "Fujifilm FinePix JV105": 6.16,
+    "Fujifilm FinePix JV110": 6.16,
+    "Fujifilm FinePix JV150": 6.16,
+    "Fujifilm FinePix JV200": 6.16,
+    "Fujifilm FinePix JV205": 6.16,
+    "Fujifilm FinePix JV250": 6.16,
+    "Fujifilm FinePix JV255": 6.16,
+    "Fujifilm FinePix JX200": 6.16,
+    "Fujifilm FinePix JX205": 6.16,
+    "Fujifilm FinePix JX210": 6.16,
+    "Fujifilm FinePix JX250": 6.16,
+    "Fujifilm FinePix JX280": 6.16,
+    "Fujifilm FinePix JX300": 6.16,
+    "Fujifilm FinePix JX305": 6.16,
+    "Fujifilm FinePix JX350": 6.16,
+    "Fujifilm FinePix JX355": 6.16,
+    "Fujifilm FinePix JX370": 6.16,
+    "Fujifilm FinePix JX375": 6.16,
+    "Fujifilm FinePix JX400": 6.16,
+    "Fujifilm FinePix JX405": 6.16,
+    "Fujifilm FinePix JX420": 6.16,
+    "Fujifilm FinePix JX500": 6.16,
+    "Fujifilm FinePix JX520": 6.16,
+    "Fujifilm FinePix JX530": 6.16,
+    "Fujifilm FinePix JX550": 6.16,
+    "Fujifilm FinePix JX580": 6.16,
+    "Fujifilm FinePix JX700": 6.16,
+    "Fujifilm FinePix JZ100": 6.16,
+    "Fujifilm FinePix JZ200": 6.16,
+    "Fujifilm FinePix JZ250": 6.16,
+    "Fujifilm FinePix JZ300": 6.16,
+    "Fujifilm FinePix JZ305": 6.16,
+    "Fujifilm FinePix JZ310": 6.16,
+    "Fujifilm FinePix JZ500": 6.16,
+    "Fujifilm FinePix JZ505": 6.16,
+    "Fujifilm FinePix JZ510": 6.16,
+    "Fujifilm FinePix JZ700": 6.16,
+    "Fujifilm FinePix M603": 7.53,
+    "Fujifilm FinePix PR21": 6.4,
+    "Fujifilm FinePix Real 3D W1": 6.16,
+    "Fujifilm FinePix Real 3D W3": 6.16,
+    "Fujifilm FinePix S1": 6.16,
+    "Fujifilm FinePix S1 Pro": 23.0,
+    "Fujifilm FinePix S1000fd": 6.16,
+    "Fujifilm FinePix S100fs": 8.8,
+    "Fujifilm FinePix S1500": 6.16,
+    "Fujifilm FinePix S1600": 6.16,
+    "Fujifilm FinePix S1700": 6.16,
+    "Fujifilm FinePix S1730": 6.16,
+    "Fujifilm FinePix S1770": 6.16,
+    "Fujifilm FinePix S1800": 6.16,
+    "Fujifilm FinePix S1850": 6.16,
+    "Fujifilm FinePix S1880": 6.16,
+    "Fujifilm FinePix S1900": 6.16,
+    "Fujifilm FinePix S2 Pro": 23.0,
+    "Fujifilm FinePix S20 Pro": 7.53,
+    "Fujifilm FinePix S2000hd": 6.16,
+    "Fujifilm FinePix S200EXR": 8.0,
+    "Fujifilm FinePix S205EXR": 8.0,
+    "Fujifilm FinePix S2500hd": 6.16,
+    "Fujifilm FinePix S2550hd": 6.16,
+    "Fujifilm FinePix S2600hd": 6.16,
+    "Fujifilm FinePix S2800hd": 6.16,
+    "Fujifilm FinePix S2900hd": 6.16,
+    "Fujifilm FinePix S2950": 6.16,
+    "Fujifilm FinePix S2980": 6.16,
+    "Fujifilm FinePix S2990": 6.16,
+    "Fujifilm FinePix S3 Pro": 23.5,
+    "Fujifilm FinePix S3000 Z": 5.33,
+    "Fujifilm FinePix S304": 5.33,
+    "Fujifilm FinePix S3200": 6.16,
+    "Fujifilm FinePix S3250": 6.16,
+    "Fujifilm FinePix S3300": 6.16,
+    "Fujifilm FinePix S3350": 6.16,
+    "Fujifilm FinePix S3400": 6.16,
+    "Fujifilm FinePix S3450": 6.16,
+    "Fujifilm FinePix S3500 Zoom": 5.33,
+    "Fujifilm FinePix S4000": 6.16,
+    "Fujifilm FinePix S4050": 6.16,
+    "Fujifilm FinePix S4200": 6.16,
+    "Fujifilm FinePix S4300": 6.16,
+    "Fujifilm FinePix S4400": 6.16,
+    "Fujifilm FinePix S4500": 6.16,
+    "Fujifilm FinePix S4600": 6.16,
+    "Fujifilm FinePix S4700": 6.16,
+    "Fujifilm FinePix S4800": 6.16,
+    "Fujifilm FinePix S5 Pro": 23.0,
+    "Fujifilm FinePix S5000 Zoom": 5.33,
+    "Fujifilm FinePix S5100 Zoom": 5.33,
+    "Fujifilm FinePix S5200 Zoom": 5.75,
+    "Fujifilm FinePix S5500 Zoom": 5.33,
+    "Fujifilm FinePix S5600 Zoom": 5.75,
+    "Fujifilm FinePix S5700 Zoom": 5.75,
+    "Fujifilm FinePix S5800": 5.75,
+    "Fujifilm FinePix S6000fd": 7.53,
+    "Fujifilm FinePix S602 Zoom": 7.53,
+    "Fujifilm FinePix S602Z Pro": 7.53,
+    "Fujifilm FinePix S6500fd": 7.53,
+    "Fujifilm FinePix S6600": 6.16,
+    "Fujifilm FinePix S6700": 6.16,
+    "Fujifilm FinePix S6800": 6.16,
+    "Fujifilm FinePix S7000 Zoom": 7.53,
+    "Fujifilm FinePix S8000fd": 6.03,
+    "Fujifilm FinePix S8100fd": 6.16,
+    "Fujifilm FinePix S8200": 6.16,
+    "Fujifilm FinePix S8300": 6.16,
+    "Fujifilm FinePix S8400": 6.16,
+    "Fujifilm FinePix S8500": 6.16,
+    "Fujifilm FinePix S8600": 6.16,
+    "Fujifilm FinePix S9000 Zoom": 8.0,
+    "Fujifilm FinePix S9100": 8.0,
+    "Fujifilm FinePix S9200": 6.16,
+    "Fujifilm FinePix S9400W": 6.16,
+    "Fujifilm FinePix S9500": 8.0,
+    "Fujifilm FinePix S9600": 8.0,
+    "Fujifilm FinePix SL1000": 6.16,
+    "Fujifilm FinePix SL240": 6.16,
+    "Fujifilm FinePix SL260": 6.16,
+    "Fujifilm FinePix SL280": 6.16,
+    "Fujifilm FinePix SL300": 6.16,
+    "Fujifilm FinePix T200": 6.16,
+    "Fujifilm FinePix T205": 6.16,
+    "Fujifilm FinePix T300": 6.16,
+    "Fujifilm FinePix T305": 6.16,
+    "Fujifilm FinePix T350": 6.16,
+    "Fujifilm FinePix T400": 6.16,
+    "Fujifilm FinePix T500": 6.16,
+    "Fujifilm FinePix T550": 6.16,
+    "Fujifilm FinePix V10 Zoom": 5.75,
+    "Fujifilm FinePix X100": 23.6,
+    "Fujifilm FinePix XP10": 6.16,
+    "Fujifilm FinePix XP100": 6.16,
+    "Fujifilm FinePix XP11": 6.16,
+    "Fujifilm FinePix XP150": 6.16,
+    "Fujifilm FinePix XP170": 6.16,
+    "Fujifilm FinePix XP20": 6.16,
+    "Fujifilm FinePix XP200": 6.16,
+    "Fujifilm FinePix XP22": 6.16,
+    "Fujifilm FinePix XP30": 6.16,
+    "Fujifilm FinePix XP33": 6.16,
+    "Fujifilm FinePix XP50": 6.16,
+    "Fujifilm FinePix XP60": 6.16,
+    "Fujifilm FinePix XP70": 6.16,
+    "Fujifilm FinePix Z1": 5.75,
+    "Fujifilm FinePix Z1000EXR": 6.4,
+    "Fujifilm FinePix Z100fd": 5.75,
+    "Fujifilm FinePix Z10fd": 5.75,
+    "Fujifilm FinePix Z110": 6.16,
+    "Fujifilm FinePix Z2": 5.75,
+    "Fujifilm FinePix Z200fd": 6.16,
+    "Fujifilm FinePix Z20fd": 6.16,
+    "Fujifilm FinePix Z3": 5.75,
+    "Fujifilm FinePix Z30": 6.16,
+    "Fujifilm FinePix Z300": 6.16,
+    "Fujifilm FinePix Z31": 6.16,
+    "Fujifilm FinePix Z33WP": 6.16,
+    "Fujifilm FinePix Z35": 6.16,
+    "Fujifilm FinePix Z37": 6.16,
+    "Fujifilm FinePix Z5fd": 5.75,
+    "Fujifilm FinePix Z70": 6.16,
+    "Fujifilm FinePix Z700EXR": 6.4,
+    "Fujifilm FinePix Z707EXR": 6.4,
+    "Fujifilm FinePix Z71": 6.16,
+    "Fujifilm FinePix Z80": 6.16,
+    "Fujifilm FinePix Z800EXR": 6.4,
+    "Fujifilm FinePix Z808EXR": 6.4,
+    "Fujifilm FinePix Z81": 6.16,
+    "Fujifilm FinePix Z90": 6.16,
+    "Fujifilm FinePix Z900EXR": 6.4,
+    "Fujifilm FinePix Z909EXR": 6.4,
+    "Fujifilm FinePix Z91": 6.16,
+    "Fujifilm FinePix Z950EXR": 6.4,
+    "Fujifilm Finepix 30i": 5.33,
+    "Fujifilm MX-1200": 6.4,
+    "Fujifilm MX-1400": 5.33,
+    "Fujifilm MX-1500": 6.4,
+    "Fujifilm MX-1700": 6.4,
+    "Fujifilm MX-2700": 6.4,
+    "Fujifilm MX-2900 Zoom": 6.4,
+    "Fujifilm MX-500": 6.4,
+    "Fujifilm MX-600 Zoom": 6.4,
+    "Fujifilm MX-700": 6.4,
+    "Fujifilm X-A1": 23.6,
+    "Fujifilm X-E1": 23.6,
+    "Fujifilm X-E2": 23.6,
+    "Fujifilm X-M1": 23.6,
+    "Fujifilm X-Pro1": 23.6,
+    "Fujifilm X-S1": 8.8,
+    "Fujifilm X-T1": 23.6,
+    "Fujifilm X10": 8.8,
+    "Fujifilm X100S": 23.6,
+    "Fujifilm X100T": 23.6,
+    "Fujifilm X20": 8.8,
+    "Fujifilm X30": 8.8,
+    "Fujifilm XF1": 8.8,
+    "Fujifilm XQ1": 8.8,
+    "GE A1030": 7.53,
+    "GE A1035": 6.08,
+    "GE A1050": 6.08,
+    "GE A1235": 6.16,
+    "GE A1250": 6.16,
+    "GE A1255": 6.08,
+    "GE A1455": 6.16,
+    "GE A1456W": 6.16,
+    "GE A730": 5.75,
+    "GE A735": 5.75,
+    "GE A830": 5.75,
+    "GE A835": 5.75,
+    "GE A950": 6.08,
+    "GE C1033": 6.08,
+    "GE C1233": 6.08,
+    "GE C1433": 6.16,
+    "GE C1440W": 6.16,
+    "GE Create": 6.16,
+    "GE E1030": 7.53,
+    "GE E1035": 7.53,
+    "GE E1040": 7.53,
+    "GE E1050": 6.08,
+    "GE E1050 TW": 6.08,
+    "GE E1055 W": 6.08,
+    "GE E1235": 7.44,
+    "GE E1240": 7.53,
+    "GE E1250TW": 6.16,
+    "GE E1255W": 6.16,
+    "GE E1276W": 6.16,
+    "GE E1410SW": 6.16,
+    "GE E1450W": 6.16,
+    "GE E1480W": 6.16,
+    "GE E1486TW": 6.16,
+    "GE E1680W": 6.08,
+    "GE E840S": 5.75,
+    "GE E850": 5.75,
+    "GE G 1": 5.75,
+    "GE G100": 6.16,
+    "GE G2": 6.03,
+    "GE G3": 6.08,
+    "GE G3WP": 6.16,
+    "GE G5WP": 6.16,
+    "GE J1050": 6.08,
+    "GE J1250": 6.16,
+    "GE J1455": 6.16,
+    "GE J1456W": 6.16,
+    "GE J1458W": 6.16,
+    "GE J1470S": 6.08,
+    "GE PJ1": 6.16,
+    "GE X1": 5.75,
+    "GE X3": 6.08,
+    "GE X500": 6.16,
+    "GE X550": 6.16,
+    "GE X600": 6.16,
+    "Hasselblad L2D-20c": 17.5,
+    "HP CA350": 6.16,
+    "HP CB350": 6.16,
+    "HP CW450": 6.16,
+    "HP CW450t": 6.16,
+    "HP PW460t": 6.16,
+    "HP PW550": 6.16,
+    "HP Photosmart 120": 6.4,
+    "HP Photosmart 318": 5.33,
+    "HP Photosmart 320": 5.33,
+    "HP Photosmart 435": 5.33,
+    "HP Photosmart 612": 5.33,
+    "HP Photosmart 620": 4.8,
+    "HP Photosmart 635": 4.5,
+    "HP Photosmart 715": 7.11,
+    "HP Photosmart 720": 7.11,
+    "HP Photosmart 733": 5.33,
+    "HP Photosmart 735": 5.33,
+    "HP Photosmart 812": 7.11,
+    "HP Photosmart 850": 7.11,
+    "HP Photosmart 935": 7.11,
+    "HP Photosmart 945": 7.11,
+    "HP Photosmart C20": 8.8,
+    "HP Photosmart C200": 8.8,
+    "HP Photosmart C215": 5.33,
+    "HP Photosmart C30": 8.8,
+    "HP Photosmart C315": 5.33,
+    "HP Photosmart C500": 8.8,
+    "HP Photosmart C618": 5.33,
+    "HP Photosmart C912": 8.8,
+    "HP Photosmart E317": 5.75,
+    "HP Photosmart E327": 5.75,
+    "HP Photosmart E337": 5.75,
+    "HP Photosmart E427": 5.75,
+    "HP Photosmart M22": 5.75,
+    "HP Photosmart M23": 5.75,
+    "HP Photosmart M307": 5.33,
+    "HP Photosmart M407": 5.75,
+    "HP Photosmart M417": 5.75,
+    "HP Photosmart M425": 5.75,
+    "HP Photosmart M437": 5.75,
+    "HP Photosmart M447": 5.75,
+    "HP Photosmart M517": 5.75,
+    "HP Photosmart M525": 5.75,
+    "HP Photosmart M527": 5.75,
+    "HP Photosmart M537": 5.75,
+    "HP Photosmart M547": 5.75,
+    "HP Photosmart M627": 5.75,
+    "HP Photosmart M637": 5.75,
+    "HP Photosmart M737": 5.75,
+    "HP Photosmart Mz67": 7.11,
+    "HP Photosmart R507": 5.75,
+    "HP Photosmart R607": 5.75,
+    "HP Photosmart R707": 7.11,
+    "HP Photosmart R717": 7.11,
+    "HP Photosmart R725": 5.75,
+    "HP Photosmart R727": 5.75,
+    "HP Photosmart R742": 5.75,
+    "HP Photosmart R817": 5.75,
+    "HP Photosmart R818": 5.75,
+    "HP Photosmart R827": 5.75,
+    "HP Photosmart R837": 5.75,
+    "HP Photosmart R847": 5.75,
+    "HP Photosmart R927": 7.11,
+    "HP Photosmart R937": 5.75,
+    "HP Photosmart R967": 7.11,
+    "HP R607 BMW": 5.75,
+    "HP R607 Harajuku": 5.75,
+    "HP SB360": 6.16,
+    "HP SW450": 6.16,
+    "HTC One X": 4.8,
+    "HTC One": 4.54,
+    "JVC GC-QX3HD": 7.11,
+    "JVC GC-QX5HD": 7.11,
+    "Jenoptik JD 1300 D": 6.4,
+    "Jenoptik JD 1300 F": 6.4,
+    "Jenoptik JD 1500 z3": 6.4,
+    "Jenoptik JD 2.1 FF": 4.5,
+    "Jenoptik JD 2.1 xz3": 4.5,
+    "Jenoptik JD 2100 AF": 5.33,
+    "Jenoptik JD 2100 F": 5.33,
+    "Jenoptik JD 2100 M": 5.33,
+    "Jenoptik JD 2100 z3 S": 5.33,
+    "Jenoptik JD 2300 z3": 7.53,
+    "Jenoptik JD 3.1 exclusiv": 6.4,
+    "Jenoptik JD 3.1 z3 MPEG 4": 5.33,
+    "Jenoptik JD 3.3 AF": 5.33,
+    "Jenoptik JD 3.3 xz3": 5.33,
+    "Jenoptik JD 3.3x4 ie": 5.33,
+    "Jenoptik JD 3.3z10": 5.33,
+    "Jenoptik JD 3300 z3": 7.11,
+    "Jenoptik JD 3300 z3 S": 7.11,
+    "Jenoptik JD 4.0 LCD": 7.11,
+    "Jenoptik JD 4.1 xz3": 5.75,
+    "Jenoptik JD 4.1 z3 MPEG4": 5.75,
+    "Jenoptik JD 4.1 z8": 5.75,
+    "Jenoptik JD 4.1 zoom": 5.75,
+    "Jenoptik JD 4100 z3": 7.53,
+    "Jenoptik JD 4100 z3 S": 7.11,
+    "Jenoptik JD 4100 zoom": 7.11,
+    "Jenoptik JD 4360z": 7.11,
+    "Jenoptik JD 4363z": 7.11,
+    "Jenoptik JD 5.0z3 EasyShot": 5.75,
+    "Jenoptik JD 5.2 z3": 7.11,
+    "Jenoptik JD 5.2 z3 MPEG4": 7.11,
+    "Jenoptik JD 5.2 zoom": 5.75,
+    "Jenoptik JD 5200 z3": 7.11,
+    "Jenoptik JD 6.0 z3": 7.11,
+    "Jenoptik JD 6.0 z3 MPEG4": 7.11,
+    "Jenoptik JD 6.0 z3 exclusiv": 7.11,
+    "Jenoptik JD 8.0 exclusiv": 7.11,
+    "Jenoptik JD 8.0z3 EasyShot": 7.11,
+    "Jenoptik JD C 1.3 LCD": 6.4,
+    "Jenoptik JD C 1.3 SD": 6.4,
+    "Jenoptik JD C 1300": 6.4,
+    "Jenoptik JD C 2.1 LCD": 6.4,
+    "Jenoptik JD C 3.0 S": 6.4,
+    "Jenoptik JD C 3.1 LCD": 6.4,
+    "Jenoptik JD C 3.1 LI": 6.4,
+    "Jenoptik JD C 3.1 SL": 6.4,
+    "Jenoptik JD C 3.1 z3": 6.4,
+    "Jenoptik JD C 5.0 SL": 7.11,
+    "Kodak DC200": 7.27,
+    "Kodak DC200 plus": 7.27,
+    "Kodak DC210 plus": 7.27,
+    "Kodak DC215": 7.27,
+    "Kodak DC220": 7.27,
+    "Kodak DC240": 7.27,
+    "Kodak DC260": 7.27,
+    "Kodak DC265": 7.27,
+    "Kodak DC280": 7.53,
+    "Kodak DC290": 7.27,
+    "Kodak DC3200": 7.53,
+    "Kodak DC3400": 7.53,
+    "Kodak DC3800": 7.53,
+    "Kodak DC4800": 7.27,
+    "Kodak DC5000": 7.27,
+    "Kodak DCS Pro 14n": 36.0,
+    "Kodak DCS Pro SLR/c": 36.0,
+    "Kodak DCS Pro SLR/n": 36.0,
+    "Kodak DCS315": 27.65,
+    "Kodak DCS330": 18.1,
+    "Kodak DCS420": 14.0,
+    "Kodak DCS460": 27.65,
+    "Kodak DCS520": 27.65,
+    "Kodak DCS560": 27.65,
+    "Kodak DCS620": 27.65,
+    "Kodak DCS620x": 22.8,
+    "Kodak DCS660": 27.65,
+    "Kodak DCS720x": 22.8,
+    "Kodak DCS760": 27.65,
+    "Kodak DX3215": 5.33,
+    "Kodak DX3500": 6.4,
+    "Kodak DX3600": 6.4,
+    "Kodak DX3700": 7.11,
+    "Kodak DX3900": 7.11,
+    "Kodak DX4330": 5.75,
+    "Kodak DX4530": 5.75,
+    "Kodak DX4900": 5.75,
+    "Kodak DX6340": 5.33,
+    "Kodak DX6440": 5.75,
+    "Kodak DX6490": 5.75,
+    "Kodak DX7440": 5.75,
+    "Kodak DX7590": 5.75,
+    "Kodak DX7630": 7.11,
+    "Kodak EasyShare C1013": 6.16,
+    "Kodak EasyShare C135": 6.16,
+    "Kodak EasyShare C140": 5.75,
+    "Kodak EasyShare C142": 5.75,
+    "Kodak EasyShare C143": 6.16,
+    "Kodak EasyShare C1505": 6.16,
+    "Kodak EasyShare C1530": 6.16,
+    "Kodak EasyShare C1550": 6.16,
+    "Kodak EasyShare C160": 5.75,
+    "Kodak EasyShare C180": 6.16,
+    "Kodak EasyShare C182": 6.16,
+    "Kodak EasyShare C183": 6.16,
+    "Kodak EasyShare C190": 6.16,
+    "Kodak EasyShare C195": 6.16,
+    "Kodak EasyShare C300": 7.11,
+    "Kodak EasyShare C310": 7.11,
+    "Kodak EasyShare C330": 7.11,
+    "Kodak EasyShare C340": 7.11,
+    "Kodak EasyShare C360": 7.11,
+    "Kodak EasyShare C433": 5.75,
+    "Kodak EasyShare C503": 5.75,
+    "Kodak EasyShare C513": 5.75,
+    "Kodak EasyShare C530": 5.75,
+    "Kodak EasyShare C533": 5.75,
+    "Kodak EasyShare C610": 5.75,
+    "Kodak EasyShare C613": 5.75,
+    "Kodak EasyShare C623": 5.75,
+    "Kodak EasyShare C643": 5.75,
+    "Kodak EasyShare C653": 5.75,
+    "Kodak EasyShare C663": 5.75,
+    "Kodak EasyShare C703": 5.75,
+    "Kodak EasyShare C713": 5.75,
+    "Kodak EasyShare C743": 5.75,
+    "Kodak EasyShare C763": 5.75,
+    "Kodak EasyShare C813": 5.75,
+    "Kodak EasyShare C875": 7.11,
+    "Kodak EasyShare C913": 5.75,
+    "Kodak EasyShare CD1013": 6.16,
+    "Kodak EasyShare CD703": 5.75,
+    "Kodak EasyShare CD80": 6.16,
+    "Kodak EasyShare CD82": 6.08,
+    "Kodak EasyShare CD90": 6.08,
+    "Kodak EasyShare CD93": 5.75,
+    "Kodak EasyShare CX4200": 5.33,
+    "Kodak EasyShare CX4230": 5.33,
+    "Kodak EasyShare CX4300": 5.33,
+    "Kodak EasyShare CX6200": 5.75,
+    "Kodak EasyShare CX6230": 5.75,
+    "Kodak EasyShare CX6330": 5.75,
+    "Kodak EasyShare CX6445": 5.75,
+    "Kodak EasyShare CX7220": 5.75,
+    "Kodak EasyShare CX7300": 5.33,
+    "Kodak EasyShare CX7330": 5.75,
+    "Kodak EasyShare CX7430": 5.75,
+    "Kodak EasyShare CX7525": 5.75,
+    "Kodak EasyShare CX7530": 5.75,
+    "Kodak EasyShare LS745": 7.11,
+    "Kodak EasyShare M1033": 6.16,
+    "Kodak EasyShare M1063": 5.75,
+    "Kodak EasyShare M1073 IS": 5.75,
+    "Kodak EasyShare M1093 IS": 6.16,
+    "Kodak EasyShare M215": 4.8,
+    "Kodak EasyShare M320": 5.75,
+    "Kodak EasyShare M340": 6.16,
+    "Kodak EasyShare M341": 6.16,
+    "Kodak EasyShare M380": 6.16,
+    "Kodak EasyShare M381": 6.16,
+    "Kodak EasyShare M420": 6.16,
+    "Kodak EasyShare M522": 6.16,
+    "Kodak EasyShare M530": 6.16,
+    "Kodak EasyShare M531": 6.16,
+    "Kodak EasyShare M532": 6.16,
+    "Kodak EasyShare M5370": 6.16,
+    "Kodak EasyShare M550": 6.16,
+    "Kodak EasyShare M552": 6.16,
+    "Kodak EasyShare M565": 6.16,
+    "Kodak EasyShare M575": 6.16,
+    "Kodak EasyShare M580": 6.16,
+    "Kodak EasyShare M583": 6.08,
+    "Kodak EasyShare M750": 6.16,
+    "Kodak EasyShare M753": 5.75,
+    "Kodak EasyShare M763": 5.75,
+    "Kodak EasyShare M853": 5.75,
+    "Kodak EasyShare M863": 5.75,
+    "Kodak EasyShare M873": 5.75,
+    "Kodak EasyShare M883": 5.75,
+    "Kodak EasyShare M893 IS": 5.75,
+    "Kodak EasyShare MD1063": 6.08,
+    "Kodak EasyShare MD30": 6.08,
+    "Kodak EasyShare MD41": 6.08,
+    "Kodak EasyShare MD81": 6.08,
+    "Kodak EasyShare MD853": 5.75,
+    "Kodak EasyShare MD863": 5.75,
+    "Kodak EasyShare MX1063": 6.08,
+    "Kodak EasyShare Max Z990": 6.08,
+    "Kodak EasyShare Mini": 4.8,
+    "Kodak EasyShare P712": 5.75,
+    "Kodak EasyShare P850": 5.75,
+    "Kodak EasyShare P880": 7.11,
+    "Kodak EasyShare Sport": 6.16,
+    "Kodak EasyShare Touch M577": 6.16,
+    "Kodak EasyShare V1003": 7.11,
+    "Kodak EasyShare V1073": 7.85,
+    "Kodak EasyShare V1233": 7.44,
+    "Kodak EasyShare V1253": 7.44,
+    "Kodak EasyShare V1273": 7.44,
+    "Kodak EasyShare V530": 5.75,
+    "Kodak EasyShare V550": 5.75,
+    "Kodak EasyShare V570": 5.75,
+    "Kodak EasyShare V603": 5.75,
+    "Kodak EasyShare V610": 5.75,
+    "Kodak EasyShare V705": 5.75,
+    "Kodak EasyShare V803": 7.11,
+    "Kodak EasyShare Z1012 IS": 6.08,
+    "Kodak EasyShare Z1015 IS": 6.08,
+    "Kodak EasyShare Z1085 IS": 7.85,
+    "Kodak EasyShare Z1275": 7.44,
+    "Kodak EasyShare Z1285": 7.44,
+    "Kodak EasyShare Z1485 IS": 7.44,
+    "Kodak EasyShare Z5010": 6.16,
+    "Kodak EasyShare Z5120": 6.08,
+    "Kodak EasyShare Z612": 5.75,
+    "Kodak EasyShare Z650": 5.75,
+    "Kodak EasyShare Z700": 5.75,
+    "Kodak EasyShare Z710": 5.75,
+    "Kodak EasyShare Z712 IS": 5.75,
+    "Kodak EasyShare Z730": 5.75,
+    "Kodak EasyShare Z740": 5.75,
+    "Kodak EasyShare Z7590": 5.75,
+    "Kodak EasyShare Z760": 7.11,
+    "Kodak EasyShare Z812 IS": 5.75,
+    "Kodak EasyShare Z8612 IS": 5.75,
+    "Kodak EasyShare Z885": 7.11,
+    "Kodak EasyShare Z915": 6.16,
+    "Kodak EasyShare Z950": 6.08,
+    "Kodak EasyShare Z980": 6.08,
+    "Kodak EasyShare Z981": 6.08,
+    "Kodak EasyShare Z990": 6.08,
+    "Kodak EasyShare ZD15": 6.16,
+    "Kodak EasyShare ZD710": 5.75,
+    "Kodak EasyShare ZD8612 IS": 5.75,
+    "Kodak EasyShare-One 6MP": 5.75,
+    "Kodak Easyshare One": 5.75,
+    "Kodak LS420": 7.11,
+    "Kodak LS443": 5.33,
+    "Kodak LS633": 5.75,
+    "Kodak LS743": 7.11,
+    "Kodak LS753": 7.11,
+    "Kodak LS755": 5.75,
+    "Kodak M590": 4.8,
+    "Kodak PixPro AZ251": 6.16,
+    "Kodak PixPro AZ361": 6.16,
+    "Kodak PixPro AZ362": 6.16,
+    "Kodak PixPro AZ501": 6.16,
+    "Kodak PixPro AZ521": 6.16,
+    "Kodak PixPro AZ522": 6.16,
+    "Kodak PixPro AZ651": 6.16,
+    "Kodak PixPro FZ151": 6.16,
+    "Kodak PixPro FZ201": 6.16,
+    "Kodak PixPro FZ41": 6.16,
+    "Kodak PixPro FZ51": 6.16,
+    "Kodak S-1": 17.3,
+    "Kodak Slice": 6.16,
+    "Kodak mc3": 6.4,
+    "Konica DG-2": 5.33,
+    "Konica DG-3Z": 5.33,
+    "Konica Q-M100": 4.8,
+    "Konica Q-M200": 6.4,
+    "Konica Revio C2": 4.23,
+    "Konica Revio KD-200Z": 5.33,
+    "Konica Revio KD-210Z": 7.11,
+    "Konica Revio KD-220Z": 4.5,
+    "Konica Revio KD-25": 7.11,
+    "Konica Revio KD-300Z": 7.11,
+    "Konica Revio KD-310Z": 7.11,
+    "Konica Revio KD-3300Z": 5.75,
+    "Konica Revio KD-4000Z": 7.11,
+    "Konica Revio KD-400Z": 7.11,
+    "Konica Revio KD-410Z": 7.11,
+    "Konica Revio KD-420Z": 5.75,
+    "Konica Revio KD-500Z": 7.11,
+    "Konica Revio KD-510Z": 7.11,
+    "Konica-Minolta DG-5W": 5.75,
+    "Konica-Minolta DiMAGE A2": 8.8,
+    "Konica-Minolta DiMAGE A200": 8.8,
+    "Konica-Minolta DiMAGE E40": 6.4,
+    "Konica-Minolta DiMAGE E50": 5.75,
+    "Konica-Minolta DiMAGE E500": 5.75,
+    "Konica-Minolta DiMAGE G530": 5.75,
+    "Konica-Minolta DiMAGE G600": 7.27,
+    "Konica-Minolta DiMAGE X1": 7.11,
+    "Konica-Minolta DiMAGE X31": 4.5,
+    "Konica-Minolta DiMAGE X50": 5.75,
+    "Konica-Minolta DiMAGE X60": 5.75,
+    "Konica-Minolta DiMAGE Xg": 5.33,
+    "Konica-Minolta DiMAGE Z10": 5.75,
+    "Konica-Minolta DiMAGE Z2": 5.75,
+    "Konica-Minolta DiMAGE Z20": 5.75,
+    "Konica-Minolta DiMAGE Z3": 5.75,
+    "Konica-Minolta DiMAGE Z5": 5.75,
+    "Konica-Minolta DiMAGE Z6": 5.75,
+    "Konica-Minolta Dynax 5D": 23.5,
+    "Konica-Minolta Dynax 7D": 23.5,
+    "Konica-Minolta e-mini": 8.8,
+    "Konica-Minolta e-mini D": 8.8,
+    "Konica-Minolta e-mini M": 8.8,
+    "Kyocera Finecam 3300": 7.11,
+    "Kyocera Finecam L3": 5.33,
+    "Kyocera Finecam L30": 5.33,
+    "Kyocera Finecam L3v": 5.33,
+    "Kyocera Finecam L4": 5.75,
+    "Kyocera Finecam L4v": 7.11,
+    "Kyocera Finecam M400R": 5.33,
+    "Kyocera Finecam M410R": 5.33,
+    "Kyocera Finecam S3": 7.11,
+    "Kyocera Finecam S3L": 7.11,
+    "Kyocera Finecam S3R": 7.11,
+    "Kyocera Finecam S3X": 7.11,
+    "Kyocera Finecam S4": 7.11,
+    "Kyocera Finecam S5": 7.11,
+    "Kyocera Finecam S5R": 7.11,
+    "Kyocera Finecam SL300R": 5.33,
+    "Kyocera Finecam SL400R": 5.33,
+    "LG G3": 4.69,
+    "LGE Nexus 5": 4.54,
+    "Leica  X1": 23.6,
+    "Leica C (Typ112)": 7.53,
+    "Leica C-LUX 1": 5.75,
+    "Leica C-LUX 2": 5.75,
+    "Leica C-LUX 3": 6.08,
+    "Leica D-LUX": 5.75,
+    "Leica D-LUX 2": 7.76,
+    "Leica D-LUX 3": 7.76,
+    "Leica D-LUX 4": 7.85,
+    "Leica D-LUX 5": 7.85,
+    "Leica D-Lux 6": 7.53,
+    "Leica Digilux": 6.4,
+    "Leica Digilux 1": 7.53,
+    "Leica Digilux 2": 8.8,
+    "Leica Digilux 3": 17.3,
+    "Leica Digilux 4.3": 7.53,
+    "Leica Digilux Zoom": 6.4,
+    "Leica M Typ 240": 36.0,
+    "Leica M-E Typ 220": 35.8,
+    "Leica M-Monochrom": 35.8,
+    "Leica M-P": 36.0,
+    "Leica M8": 27.0,
+    "Leica M8.2": 27.0,
+    "Leica M9": 35.8,
+    "Leica M9 Titanium": 35.8,
+    "Leica M9-P": 35.8,
+    "Leica S (Type 007)": 45.0,
+    "Leica S-E": 45.0,
+    "Leica S2": 45.0,
+    "Leica T (Typ 701)": 23.6,
+    "Leica V-LUX 1": 7.11,
+    "Leica V-LUX 2": 6.08,
+    "Leica V-LUX 20": 6.08,
+    "Leica V-LUX 3": 6.08,
+    "Leica V-LUX 30": 6.08,
+    "Leica V-Lux 4": 6.16,
+    "Leica V-Lux 40": 6.08,
+    "Leica X (Typ 113)": 23.6,
+    "Leica X Vario": 23.6,
+    "Leica X-E": 23.6,
+    "Leica X2": 23.6,
+    "Minolta DiMAGE 2300": 7.53,
+    "Minolta DiMAGE 2330": 7.53,
+    "Minolta DiMAGE 5": 7.11,
+    "Minolta DiMAGE 7": 8.8,
+    "Minolta DiMAGE 7Hi": 8.8,
+    "Minolta DiMAGE 7i": 8.8,
+    "Minolta DiMAGE A1": 8.8,
+    "Minolta DiMAGE E201": 7.53,
+    "Minolta DiMAGE E203": 5.33,
+    "Minolta DiMAGE E223": 5.33,
+    "Minolta DiMAGE E323": 5.33,
+    "Minolta DiMAGE EX 1500 Wide": 6.4,
+    "Minolta DiMAGE EX 1500 Zoom": 6.4,
+    "Minolta DiMAGE F100": 7.11,
+    "Minolta DiMAGE F200": 7.11,
+    "Minolta DiMAGE F300": 7.11,
+    "Minolta DiMAGE G400": 5.75,
+    "Minolta DiMAGE G500": 7.11,
+    "Minolta DiMAGE S304": 7.11,
+    "Minolta DiMAGE S404": 7.11,
+    "Minolta DiMAGE S414": 7.11,
+    "Minolta DiMAGE X": 5.33,
+    "Minolta DiMAGE X20": 4.5,
+    "Minolta DiMAGE Xi": 5.33,
+    "Minolta DiMAGE Xt": 5.33,
+    "Minolta DiMAGE Z1": 5.33,
+    "Minolta RD-3000": 6.4,
+    "Minox Classic Leica M3 2.1": 6.4,
+    "Minox Classic Leica M3 3MP": 6.4,
+    "Minox Classic Leica M3 4MP": 6.4,
+    "Minox Classic Leica M3 5MP": 6.4,
+    "Minox DC 1011": 7.53,
+    "Minox DC 1011 Carat": 7.53,
+    "Minox DC 1022": 7.53,
+    "Minox DC 1033": 5.75,
+    "Minox DC 1044": 6.08,
+    "Minox DC 1055": 6.08,
+    "Minox DC 1211": 6.08,
+    "Minox DC 1222": 6.16,
+    "Minox DC 1233": 6.08,
+    "Minox DC 1311": 5.33,
+    "Minox DC 1422": 6.08,
+    "Minox DC 2111": 5.33,
+    "Minox DC 2122": 5.33,
+    "Minox DC 2133": 4.5,
+    "Minox DC 3311": 7.11,
+    "Minox DC 4011": 7.11,
+    "Minox DC 4211": 5.75,
+    "Minox DC 5011": 5.75,
+    "Minox DC 5211": 7.11,
+    "Minox DC 5222": 5.75,
+    "Minox DC 6011": 5.75,
+    "Minox DC 6033 WP": 5.75,
+    "Minox DC 6211": 5.75,
+    "Minox DC 6311": 7.11,
+    "Minox DC 7011": 5.75,
+    "Minox DC 7022": 5.75,
+    "Minox DC 7411": 5.75,
+    "Minox DC 8011": 5.75,
+    "Minox DC 8022 WP": 5.75,
+    "Minox DC 8111": 7.11,
+    "Minox DC 8122": 7.11,
+    "Minox DC 9011 WP": 6.16,
+    "Minox DCC 14.0": 6.16,
+    "Minox DCC 5.0 White Edition": 6.16,
+    "Minox DCC 5.1": 6.16,
+    "Minox DCC Leica M3 5MP Gold": 6.16,
+    "Minox DCC Rolleiflex AF 5.0": 6.4,
+    "Minox DD1": 6.4,
+    "Minox DD1 Diamond": 6.4,
+    "Minox DD100": 6.4,
+    "Minox DD200": 6.4,
+    "Minox DM 1": 6.4,
+    "Minox Mobi DV": 6.4,
+    "Minox Rolleiflex MiniDigi": 6.4,
+    "Nikon 1 AW1": 13.2,
+    "Nikon 1 J1": 13.2,
+    "Nikon 1 J2": 13.2,
+    "Nikon 1 J3": 13.2,
+    "Nikon 1 J4": 13.2,
+    "Nikon 1 S1": 13.2,
+    "Nikon 1 S2": 13.2,
+    "Nikon 1 V1": 13.2,
+    "Nikon 1 V2": 13.2,
+    "Nikon 1 V3": 13.2,
+    "Nikon Coolpix 100": 4.8,
+    "Nikon Coolpix 2000": 5.33,
+    "Nikon Coolpix 2100": 4.5,
+    "Nikon Coolpix 2200": 4.5,
+    "Nikon Coolpix 2500": 5.33,
+    "Nikon Coolpix 300": 4.8,
+    "Nikon Coolpix 3100": 5.33,
+    "Nikon Coolpix 3200": 5.33,
+    "Nikon Coolpix 3500": 5.33,
+    "Nikon Coolpix 3700": 5.33,
+    "Nikon Coolpix 4100": 5.75,
+    "Nikon Coolpix 4200": 7.11,
+    "Nikon Coolpix 4300": 7.11,
+    "Nikon Coolpix 4500": 7.11,
+    "Nikon Coolpix 4600": 5.75,
+    "Nikon Coolpix 4800": 5.75,
+    "Nikon Coolpix 5000": 8.8,
+    "Nikon Coolpix 5200": 7.11,
+    "Nikon Coolpix 5400": 7.11,
+    "Nikon Coolpix 5600": 5.75,
+    "Nikon Coolpix 5700": 8.8,
+    "Nikon Coolpix 5900": 7.11,
+    "Nikon Coolpix 600": 5.33,
+    "Nikon Coolpix 700": 6.4,
+    "Nikon Coolpix 7600": 7.11,
+    "Nikon Coolpix 775": 5.33,
+    "Nikon Coolpix 7900": 7.11,
+    "Nikon Coolpix 800": 6.4,
+    "Nikon Coolpix 8400": 8.8,
+    "Nikon Coolpix 8700": 8.8,
+    "Nikon Coolpix 880": 7.11,
+    "Nikon Coolpix 8800": 8.8,
+    "Nikon Coolpix 885": 7.11,
+    "Nikon Coolpix 900": 5.33,
+    "Nikon Coolpix 900s": 5.33,
+    "Nikon Coolpix 910": 6.4,
+    "Nikon Coolpix 950": 6.4,
+    "Nikon Coolpix 990": 7.11,
+    "Nikon Coolpix 995": 7.11,
+    "Nikon Coolpix A": 23.6,
+    "Nikon Coolpix AW100": 6.16,
+    "Nikon Coolpix AW100s": 6.16,
+    "Nikon Coolpix AW110": 6.16,
+    "Nikon Coolpix AW120": 6.16,
+    "Nikon Coolpix L1": 5.75,
+    "Nikon Coolpix L10": 5.75,
+    "Nikon Coolpix L100": 6.08,
+    "Nikon Coolpix L101": 5.75,
+    "Nikon Coolpix L11": 5.75,
+    "Nikon Coolpix L110": 6.16,
+    "Nikon Coolpix L12": 5.75,
+    "Nikon Coolpix L120": 6.16,
+    "Nikon Coolpix L14": 5.75,
+    "Nikon Coolpix L15": 5.75,
+    "Nikon Coolpix L16": 5.75,
+    "Nikon Coolpix L18": 5.75,
+    "Nikon Coolpix L19": 5.75,
+    "Nikon Coolpix L20": 6.08,
+    "Nikon Coolpix L21": 6.08,
+    "Nikon Coolpix L22": 6.16,
+    "Nikon Coolpix L23": 4.96,
+    "Nikon Coolpix L24": 6.16,
+    "Nikon Coolpix L25": 4.8,
+    "Nikon Coolpix L26": 6.16,
+    "Nikon Coolpix L27": 6.16,
+    "Nikon Coolpix L28": 6.16,
+    "Nikon Coolpix L29": 6.16,
+    "Nikon Coolpix L30": 6.16,
+    "Nikon Coolpix L310": 6.16,
+    "Nikon Coolpix L320": 6.16,
+    "Nikon Coolpix L330": 6.16,
+    "Nikon Coolpix L5": 5.75,
+    "Nikon Coolpix L6": 5.75,
+    "Nikon Coolpix L610": 6.16,
+    "Nikon Coolpix L620": 6.16,
+    "Nikon Coolpix L810": 6.16,
+    "Nikon Coolpix L820": 6.16,
+    "Nikon Coolpix L830": 6.16,
+    "Nikon Coolpix P1": 7.11,
+    "Nikon Coolpix P100": 6.16,
+    "Nikon Coolpix P2": 7.11,
+    "Nikon Coolpix P3": 7.11,
+    "Nikon Coolpix P300": 6.16,
+    "Nikon Coolpix P310": 6.16,
+    "Nikon Coolpix P330": 7.53,
+    "Nikon Coolpix P340": 7.53,
+    "Nikon Coolpix P4": 7.11,
+    "Nikon Coolpix P50": 5.75,
+    "Nikon Coolpix P500": 6.16,
+    "Nikon Coolpix P5000": 7.11,
+    "Nikon Coolpix P510": 6.16,
+    "Nikon Coolpix P5100": 7.44,
+    "Nikon Coolpix P520": 6.16,
+    "Nikon Coolpix P530": 6.16,
+    "Nikon Coolpix P60": 5.75,
+    "Nikon Coolpix P600": 6.16,
+    "Nikon Coolpix P6000": 7.44,
+    "Nikon Coolpix P7000": 7.53,
+    "Nikon Coolpix P7100": 7.53,
+    "Nikon Coolpix P7700": 7.53,
+    "Nikon Coolpix P7800": 7.53,
+    "Nikon Coolpix P80": 6.08,
+    "Nikon Coolpix P90": 6.08,
+    "Nikon Coolpix S01": 4.96,
+    "Nikon Coolpix S02": 4.8,
+    "Nikon Coolpix S1": 5.75,
+    "Nikon Coolpix S10": 5.75,
+    "Nikon Coolpix S100": 6.16,
+    "Nikon Coolpix S1000pj": 6.16,
+    "Nikon Coolpix S1100pj": 6.16,
+    "Nikon Coolpix S1200pj": 6.16,
+    "Nikon Coolpix S2": 5.75,
+    "Nikon Coolpix S200": 5.75,
+    "Nikon Coolpix S210": 5.75,
+    "Nikon Coolpix S220": 6.08,
+    "Nikon Coolpix S225": 6.08,
+    "Nikon Coolpix S230": 6.08,
+    "Nikon Coolpix S2500": 6.16,
+    "Nikon Coolpix S2600": 6.16,
+    "Nikon Coolpix S2700": 6.16,
+    "Nikon Coolpix S2750": 6.16,
+    "Nikon Coolpix S2800 ": 6.16,
+    "Nikon Coolpix S3": 5.75,
+    "Nikon Coolpix S30": 4.8,
+    "Nikon Coolpix S3000": 6.16,
+    "Nikon Coolpix S31": 4.96,
+    "Nikon Coolpix S3100": 6.16,
+    "Nikon Coolpix S32": 4.8,
+    "Nikon Coolpix S3200": 6.16,
+    "Nikon Coolpix S3300": 6.16,
+    "Nikon Coolpix S3400": 6.16,
+    "Nikon Coolpix S3500": 6.16,
+    "Nikon Coolpix S3600": 6.16,
+    "Nikon Coolpix S4": 5.75,
+    "Nikon Coolpix S4000": 6.16,
+    "Nikon Coolpix S4100": 6.16,
+    "Nikon Coolpix S4150": 6.16,
+    "Nikon Coolpix S4200": 6.16,
+    "Nikon Coolpix S4300": 6.16,
+    "Nikon Coolpix S4400": 6.16,
+    "Nikon Coolpix S5": 5.75,
+    "Nikon Coolpix S50": 5.75,
+    "Nikon Coolpix S500": 5.75,
+    "Nikon Coolpix S50c": 5.75,
+    "Nikon Coolpix S51": 5.75,
+    "Nikon Coolpix S510": 5.75,
+    "Nikon Coolpix S5100": 6.16,
+    "Nikon Coolpix S51c": 5.75,
+    "Nikon Coolpix S52": 5.75,
+    "Nikon Coolpix S520": 5.75,
+    "Nikon Coolpix S5200": 6.16,
+    "Nikon Coolpix S52c": 5.75,
+    "Nikon Coolpix S5300": 6.16,
+    "Nikon Coolpix S550": 6.16,
+    "Nikon Coolpix S560": 6.08,
+    "Nikon Coolpix S570": 6.16,
+    "Nikon Coolpix S6": 5.75,
+    "Nikon Coolpix S60": 6.16,
+    "Nikon Coolpix S600": 6.08,
+    "Nikon Coolpix S6000": 6.16,
+    "Nikon Coolpix S610": 6.08,
+    "Nikon Coolpix S6100": 6.16,
+    "Nikon Coolpix S610c": 6.08,
+    "Nikon Coolpix S6150": 6.16,
+    "Nikon Coolpix S620": 6.08,
+    "Nikon Coolpix S6200": 6.16,
+    "Nikon Coolpix S630": 6.08,
+    "Nikon Coolpix S6300": 6.16,
+    "Nikon Coolpix S640": 6.08,
+    "Nikon Coolpix S6400": 6.16,
+    "Nikon Coolpix S6500": 6.16,
+    "Nikon Coolpix S6600 ": 6.16,
+    "Nikon Coolpix S6700": 6.16,
+    "Nikon Coolpix S6800": 6.16,
+    "Nikon Coolpix S6900": 6.16,
+    "Nikon Coolpix S70": 6.16,
+    "Nikon Coolpix S700": 7.44,
+    "Nikon Coolpix S710": 7.44,
+    "Nikon Coolpix S7c": 5.75,
+    "Nikon Coolpix S80": 6.16,
+    "Nikon Coolpix S8000": 6.16,
+    "Nikon Coolpix S800c": 6.16,
+    "Nikon Coolpix S8100": 6.16,
+    "Nikon Coolpix S810c": 6.16,
+    "Nikon Coolpix S8200": 6.16,
+    "Nikon Coolpix S9": 5.75,
+    "Nikon Coolpix S9050": 6.16,
+    "Nikon Coolpix S9100": 6.16,
+    "Nikon Coolpix S9200": 6.16,
+    "Nikon Coolpix S9300": 6.16,
+    "Nikon Coolpix S9400": 6.16,
+    "Nikon Coolpix S9500": 6.16,
+    "Nikon Coolpix S9600": 6.16,
+    "Nikon Coolpix S9700": 6.16,
+    "Nikon Coolpix SQ": 5.33,
+    "Nikon D1": 23.7,
+    "Nikon D100": 23.7,
+    "Nikon D1H": 23.7,
+    "Nikon D1X": 23.7,
+    "Nikon D200": 23.6,
+    "Nikon D2H": 23.7,
+    "Nikon D2Hs": 23.2,
+    "Nikon D2X": 23.7,
+    "Nikon D2xs": 23.7,
+    "Nikon D3": 36.0,
+    "Nikon D300": 23.6,
+    "Nikon D3000": 23.6,
+    "Nikon D300s": 23.6,
+    "Nikon D3100": 23.1,
+    "Nikon D3200": 23.2,
+    "Nikon D3300": 23.5,
+    "Nikon D3X": 35.9,
+    "Nikon D3s": 36.0,
+    "Nikon D4": 36.0,
+    "Nikon D40": 23.7,
+    "Nikon D40x": 23.6,
+    "Nikon D4s": 36.0,
+    "Nikon D50": 23.7,
+    "Nikon D5000": 23.6,
+    "Nikon D5100": 23.6,
+    "Nikon D5200": 23.5,
+    "Nikon D5300": 23.5,
+    "Nikon D60": 23.6,
+    "Nikon D600": 35.9,
+    "Nikon D610": 35.9,
+    "Nikon D70": 23.7,
+    "Nikon D700": 36.0,
+    "Nikon D7000": 23.6,
+    "Nikon D70s": 23.7,
+    "Nikon D7100": 23.5,
+    "Nikon D750": 35.9,
+    "Nikon D80": 23.6,
+    "Nikon D800": 35.9,
+    "Nikon D800E": 35.9,
+    "Nikon D810": 35.9,
+    "Nikon D90": 23.6,
+    "Nikon Df": 36.0,
+    "Nikon E2Ns": 8.8,
+    "Nikon E2n": 8.8,
+    "Nikon E2s": 8.8,
+    "Nikon E3": 8.8,
+    "Nikon E3s": 8.8,
+    "Nokia 808 PureView": 10.82,
+    "Nokia Lumia 1020": 8.64,
+    "Nokia Lumia 920": 4.8,
+    "Olympus ": 3.2,
+    "Olympus  E-300 / EVOLT E-300": 17.3,
+    "Olympus AZ-1": 5.33,
+    "Olympus AZ-1 Ferrari 2004": 5.33,
+    "Olympus AZ-2 Zoom": 5.33,
+    "Olympus C-1": 4.5,
+    "Olympus C-1 Zoom": 4.5,
+    "Olympus C-100": 4.5,
+    "Olympus C-1000L": 6.4,
+    "Olympus C-120": 4.5,
+    "Olympus C-1400L": 8.8,
+    "Olympus C-1400XL": 8.8,
+    "Olympus C-150": 4.5,
+    "Olympus C-160": 5.33,
+    "Olympus C-170": 5.75,
+    "Olympus C-180": 5.75,
+    "Olympus C-2": 5.33,
+    "Olympus C-200 Zoom": 5.33,
+    "Olympus C-2000 Zoom": 6.4,
+    "Olympus C-2020 Zoom": 6.4,
+    "Olympus C-2040 Zoom": 6.4,
+    "Olympus C-21": 6.4,
+    "Olympus C-2100 UZ": 6.4,
+    "Olympus C-220 Zoom": 4.5,
+    "Olympus C-2500 L": 8.8,
+    "Olympus C-300 Zoom": 5.75,
+    "Olympus C-3000 Zoom": 7.11,
+    "Olympus C-3020 Zoom": 7.11,
+    "Olympus C-3030 Zoom": 7.11,
+    "Olympus C-3040 Zoom": 7.11,
+    "Olympus C-310 Zoom": 5.33,
+    "Olympus C-315 Zoom": 5.75,
+    "Olympus C-350 Zoom": 5.75,
+    "Olympus C-360 Zoom": 5.75,
+    "Olympus C-370 Zoom": 5.33,
+    "Olympus C-40 Zoom": 7.11,
+    "Olympus C-4000 Zoom": 7.11,
+    "Olympus C-4040 Zoom": 7.11,
+    "Olympus C-450 Zoom": 5.75,
+    "Olympus C-460 Zoom del Sol": 5.75,
+    "Olympus C-470 Zoom": 5.75,
+    "Olympus C-480 Zoom": 5.75,
+    "Olympus C-50 Zoom": 7.11,
+    "Olympus C-5000 Zoom": 7.27,
+    "Olympus C-5050 Zoom": 7.11,
+    "Olympus C-5060 Wide Zoom": 7.11,
+    "Olympus C-55 Zoom": 7.11,
+    "Olympus C-5500 Sport Zoom": 7.11,
+    "Olympus C-60 Zoom": 7.27,
+    "Olympus C-70 Zoom": 7.11,
+    "Olympus C-700 UZ": 5.33,
+    "Olympus C-7000 Zoom": 7.11,
+    "Olympus C-7070 Wide Zoom": 7.11,
+    "Olympus C-720 UZ": 5.75,
+    "Olympus C-730 UZ": 5.33,
+    "Olympus C-740 UZ": 5.75,
+    "Olympus C-750 UZ": 5.33,
+    "Olympus C-760 UZ": 5.33,
+    "Olympus C-765 UZ": 5.75,
+    "Olympus C-770 UZ": 5.75,
+    "Olympus C-8080 Wide Zoom": 8.8,
+    "Olympus C-820L": 4.8,
+    "Olympus C-840L": 5.33,
+    "Olympus C-860L": 5.33,
+    "Olympus C-900 Zoom": 5.33,
+    "Olympus C-920 Zoom": 5.33,
+    "Olympus C-960 Zoom": 5.33,
+    "Olympus C-990 Zoom": 5.33,
+    "Olympus D-150Z": 4.5,
+    "Olympus D-200L": 8.8,
+    "Olympus D-300L": 8.8,
+    "Olympus D-340L": 8.8,
+    "Olympus D-340R": 6.4,
+    "Olympus D-360L": 7.11,
+    "Olympus D-370": 4.5,
+    "Olympus D-380": 4.5,
+    "Olympus D-390": 4.5,
+    "Olympus D-395": 5.33,
+    "Olympus D-40 Zoom": 7.11,
+    "Olympus D-400 Zoom": 6.4,
+    "Olympus D-425": 5.75,
+    "Olympus D-435": 5.75,
+    "Olympus D-450 Zoom": 5.33,
+    "Olympus D-460 Zoom": 7.11,
+    "Olympus D-490 Zoom": 5.33,
+    "Olympus D-500L": 8.8,
+    "Olympus D-510 Zoom": 5.33,
+    "Olympus D-520 Zoom": 4.5,
+    "Olympus D-535 Zoom": 5.33,
+    "Olympus D-540 Zoom": 5.33,
+    "Olympus D-545 Zoom": 5.75,
+    "Olympus D-560 Zoom": 5.75,
+    "Olympus D-580 Zoom": 5.33,
+    "Olympus D-595 Zoom": 5.75,
+    "Olympus D-600L": 8.8,
+    "Olympus D-620L": 8.8,
+    "Olympus D-630 Zoom": 5.75,
+    "Olympus E-1": 17.3,
+    "Olympus E-10": 8.8,
+    "Olympus E-100 RS": 6.4,
+    "Olympus E-20": 8.8,
+    "Olympus E-3": 17.3,
+    "Olympus E-30": 17.3,
+    "Olympus E-400": 17.3,
+    "Olympus E-410 / EVOLT E-410": 17.3,
+    "Olympus E-420": 17.3,
+    "Olympus E-450": 17.3,
+    "Olympus E-5": 17.3,
+    "Olympus E-500 / EVOLT E-500": 17.3,
+    "Olympus E-510 / EVOLT E-510": 17.3,
+    "Olympus E-520": 17.3,
+    "Olympus E-600": 17.3,
+    "Olympus E-620": 17.3,
+    "Olympus FE-100": 5.75,
+    "Olympus FE-110": 5.75,
+    "Olympus FE-115": 5.75,
+    "Olympus FE-120": 5.75,
+    "Olympus FE-130": 5.75,
+    "Olympus FE-140": 5.75,
+    "Olympus FE-150": 5.75,
+    "Olympus FE-160": 5.75,
+    "Olympus FE-170": 5.75,
+    "Olympus FE-180": 5.75,
+    "Olympus FE-190": 5.75,
+    "Olympus FE-20": 6.03,
+    "Olympus FE-200": 5.75,
+    "Olympus FE-210": 5.75,
+    "Olympus FE-220": 5.75,
+    "Olympus FE-230": 5.75,
+    "Olympus FE-240": 5.75,
+    "Olympus FE-25": 6.08,
+    "Olympus FE-250": 7.11,
+    "Olympus FE-26": 6.08,
+    "Olympus FE-270": 5.75,
+    "Olympus FE-280": 6.16,
+    "Olympus FE-290": 5.75,
+    "Olympus FE-300": 7.44,
+    "Olympus FE-3000": 6.08,
+    "Olympus FE-3010": 6.08,
+    "Olympus FE-310": 5.75,
+    "Olympus FE-340": 6.03,
+    "Olympus FE-350": 5.75,
+    "Olympus FE-360": 6.03,
+    "Olympus FE-370": 6.03,
+    "Olympus FE-4000": 6.16,
+    "Olympus FE-4010": 6.16,
+    "Olympus FE-4020": 6.16,
+    "Olympus FE-4030": 6.08,
+    "Olympus FE-4040": 6.08,
+    "Olympus FE-4050": 6.16,
+    "Olympus FE-45": 6.08,
+    "Olympus FE-47": 6.08,
+    "Olympus FE-48": 6.16,
+    "Olympus FE-5000": 6.08,
+    "Olympus FE-5010": 6.08,
+    "Olympus FE-5020": 6.16,
+    "Olympus FE-5030": 6.08,
+    "Olympus FE-5035": 6.08,
+    "Olympus FE-5040": 6.16,
+    "Olympus FE-5050": 6.16,
+    "Olympus IR 500": 5.33,
+    "Olympus IR-300": 5.75,
+    "Olympus Mju 1060": 6.08,
+    "Olympus Mju 5000": 6.08,
+    "Olympus Mju 7050": 6.16,
+    "Olympus OM-D E-M1": 17.3,
+    "Olympus OM-D E-M10": 17.3,
+    "Olympus OM-D E-M5": 17.3,
+    "Olympus PEN E-P1": 17.3,
+    "Olympus PEN E-P2": 17.3,
+    "Olympus PEN E-P3": 17.3,
+    "Olympus PEN E-P5": 17.3,
+    "Olympus PEN E-PL1": 17.3,
+    "Olympus PEN E-PL1s": 17.3,
+    "Olympus PEN E-PL2": 17.3,
+    "Olympus PEN E-PL3": 17.3,
+    "Olympus PEN E-PL5": 17.3,
+    "Olympus PEN E-PL6": 17.3,
+    "Olympus PEN E-PL7": 17.3,
+    "Olympus PEN E-PM1": 17.3,
+    "Olympus PEN E-PM2": 17.3,
+    "Olympus SH-21": 6.16,
+    "Olympus SH-25MR": 6.16,
+    "Olympus SH-50 iHS": 6.16,
+    "Olympus SP 310": 7.11,
+    "Olympus SP 320": 7.11,
+    "Olympus SP 350": 7.11,
+    "Olympus SP 500 UZ": 5.75,
+    "Olympus SP 510 UZ": 5.75,
+    "Olympus SP 550 UZ": 5.75,
+    "Olympus SP 560 UZ": 6.16,
+    "Olympus SP 570 UZ": 6.08,
+    "Olympus SP 590 UZ": 6.08,
+    "Olympus SP 600 UZ": 6.08,
+    "Olympus SP 700": 5.75,
+    "Olympus SP 800 UZ": 6.08,
+    "Olympus SP 810 UZ": 6.16,
+    "Olympus SP-100": 6.16,
+    "Olympus SP-565UZ": 6.08,
+    "Olympus SP-610UZ": 6.16,
+    "Olympus SP-620 UZ": 6.16,
+    "Olympus SP-720UZ": 6.16,
+    "Olympus SZ-10": 6.16,
+    "Olympus SZ-11": 6.16,
+    "Olympus SZ-12": 6.16,
+    "Olympus SZ-14": 6.16,
+    "Olympus SZ-15": 6.16,
+    "Olympus SZ-16": 6.16,
+    "Olympus SZ-20": 6.16,
+    "Olympus SZ-30MR": 6.16,
+    "Olympus SZ-31MR iHS": 6.16,
+    "Olympus Stylus 1": 7.53,
+    "Olympus Stylus 1000": 7.11,
+    "Olympus Stylus 1010": 6.08,
+    "Olympus Stylus 1020": 6.08,
+    "Olympus Stylus 1030 SW": 6.08,
+    "Olympus Stylus 1040": 6.08,
+    "Olympus Stylus 1050 SW": 6.08,
+    "Olympus Stylus 1200": 7.44,
+    "Olympus Stylus 300": 5.75,
+    "Olympus Stylus 400": 5.75,
+    "Olympus Stylus 410": 5.75,
+    "Olympus Stylus 500": 5.75,
+    "Olympus Stylus 5010": 6.08,
+    "Olympus Stylus 550WP": 6.08,
+    "Olympus Stylus 600": 5.75,
+    "Olympus Stylus 700": 6.16,
+    "Olympus Stylus 7000": 6.08,
+    "Olympus Stylus 7010": 6.08,
+    "Olympus Stylus 7030": 6.08,
+    "Olympus Stylus 7040": 6.08,
+    "Olympus Stylus 720 SW": 6.16,
+    "Olympus Stylus 725 SW": 6.16,
+    "Olympus Stylus 730": 6.16,
+    "Olympus Stylus 740": 6.16,
+    "Olympus Stylus 750": 6.16,
+    "Olympus Stylus 760": 6.16,
+    "Olympus Stylus 770 SW": 6.16,
+    "Olympus Stylus 780": 6.16,
+    "Olympus Stylus 790 SW": 6.16,
+    "Olympus Stylus 800": 7.11,
+    "Olympus Stylus 810": 7.11,
+    "Olympus Stylus 820": 6.16,
+    "Olympus Stylus 830": 6.16,
+    "Olympus Stylus 840": 6.03,
+    "Olympus Stylus 850 SW": 6.03,
+    "Olympus Stylus 9000": 6.08,
+    "Olympus Stylus 9010": 6.08,
+    "Olympus Stylus SH-1": 6.16,
+    "Olympus Stylus SP-820UZ": 6.16,
+    "Olympus Stylus Tough 6000": 6.16,
+    "Olympus Stylus Tough 6010": 6.16,
+    "Olympus Stylus Tough 6020": 6.08,
+    "Olympus Stylus Tough 8000": 6.08,
+    "Olympus Stylus Tough 8010": 6.08,
+    "Olympus Stylus Tough-3000": 6.08,
+    "Olympus Stylus Verve": 5.75,
+    "Olympus Stylus Verve  S": 5.75,
+    "Olympus Stylus XZ-10": 6.16,
+    "Olympus T-10": 6.16,
+    "Olympus T-100": 6.16,
+    "Olympus T-110": 6.16,
+    "Olympus TG-310": 6.16,
+    "Olympus TG-320": 6.16,
+    "Olympus TG-610": 6.16,
+    "Olympus TG-630 iHS": 6.16,
+    "Olympus TG-810": 6.16,
+    "Olympus TG-820 iHS": 6.16,
+    "Olympus TG-830 iHS": 6.16,
+    "Olympus TG-850 iHS": 6.16,
+    "Olympus Tough TG-1 iHS": 6.16,
+    "Olympus Tough TG-2 iHS": 6.16,
+    "Olympus Tough TG-3": 6.16,
+    "Olympus Tough TG-620": 6.16,
+    "Olympus VG-110": 6.16,
+    "Olympus VG-120": 6.16,
+    "Olympus VG-130": 6.16,
+    "Olympus VG-145": 6.16,
+    "Olympus VG-150": 6.16,
+    "Olympus VG-160": 6.16,
+    "Olympus VG-165": 6.16,
+    "Olympus VG-170": 6.16,
+    "Olympus VG-180": 6.16,
+    "Olympus VG-190": 6.16,
+    "Olympus VH-210": 6.16,
+    "Olympus VH-410": 6.16,
+    "Olympus VH-510": 6.16,
+    "Olympus VH-515": 6.16,
+    "Olympus VH-520": 6.16,
+    "Olympus VR-310": 6.16,
+    "Olympus VR-320": 6.16,
+    "Olympus VR-330": 6.16,
+    "Olympus VR-340": 6.16,
+    "Olympus VR-350": 6.16,
+    "Olympus VR-360": 6.16,
+    "Olympus VR-370": 6.16,
+    "Olympus X-15": 6.03,
+    "Olympus X-775": 5.75,
+    "Olympus X-785": 5.75,
+    "Olympus X-905": 6.16,
+    "Olympus X-920": 6.16,
+    "Olympus XZ-1": 7.85,
+    "Olympus XZ-2 iHS": 7.53,
+    "Olympus mju 400 Digital Ferrari": 5.75,
+    "Olympus mju 800 black": 7.11,
+    "Olympus mju mini Digital": 5.75,
+    "Olympus mju mini Digital S": 5.75,
+    "Panasonic  Lumix DMC-LZ20": 6.08,
+    "Panasonic D-snap SV-AS10": 4.5,
+    "Panasonic D-snap SV-AS3": 4.5,
+    "Panasonic D-snap SV-AS30": 4.5,
+    "Panasonic Lumix DMC-3D1": 6.16,
+    "Panasonic Lumix DMC-F1": 5.75,
+    "Panasonic Lumix DMC-F3": 6.08,
+    "Panasonic Lumix DMC-F5": 6.08,
+    "Panasonic Lumix DMC-F7": 5.33,
+    "Panasonic Lumix DMC-FH1": 6.08,
+    "Panasonic Lumix DMC-FH10": 6.08,
+    "Panasonic Lumix DMC-FH2": 6.08,
+    "Panasonic Lumix DMC-FH20": 6.08,
+    "Panasonic Lumix DMC-FH22": 6.08,
+    "Panasonic Lumix DMC-FH25": 6.16,
+    "Panasonic Lumix DMC-FH27": 6.16,
+    "Panasonic Lumix DMC-FH3": 6.08,
+    "Panasonic Lumix DMC-FH4": 6.08,
+    "Panasonic Lumix DMC-FH5": 6.08,
+    "Panasonic Lumix DMC-FH6": 6.08,
+    "Panasonic Lumix DMC-FH7": 6.08,
+    "Panasonic Lumix DMC-FH8": 6.08,
+    "Panasonic Lumix DMC-FP1": 6.08,
+    "Panasonic Lumix DMC-FP2": 6.08,
+    "Panasonic Lumix DMC-FP3": 6.08,
+    "Panasonic Lumix DMC-FP5": 6.08,
+    "Panasonic Lumix DMC-FP7": 6.08,
+    "Panasonic Lumix DMC-FP8": 6.08,
+    "Panasonic Lumix DMC-FS10": 6.08,
+    "Panasonic Lumix DMC-FS11": 6.08,
+    "Panasonic Lumix DMC-FS12": 6.08,
+    "Panasonic Lumix DMC-FS15": 6.08,
+    "Panasonic Lumix DMC-FS16": 6.08,
+    "Panasonic Lumix DMC-FS18": 6.08,
+    "Panasonic Lumix DMC-FS2": 5.75,
+    "Panasonic Lumix DMC-FS20": 6.08,
+    "Panasonic Lumix DMC-FS22": 6.08,
+    "Panasonic Lumix DMC-FS25": 6.08,
+    "Panasonic Lumix DMC-FS28": 6.08,
+    "Panasonic Lumix DMC-FS3": 5.75,
+    "Panasonic Lumix DMC-FS30": 6.08,
+    "Panasonic Lumix DMC-FS33": 6.08,
+    "Panasonic Lumix DMC-FS35": 6.16,
+    "Panasonic Lumix DMC-FS37": 6.16,
+    "Panasonic Lumix DMC-FS40": 6.08,
+    "Panasonic Lumix DMC-FS42": 5.75,
+    "Panasonic Lumix DMC-FS45": 6.08,
+    "Panasonic Lumix DMC-FS5": 6.08,
+    "Panasonic Lumix DMC-FS6": 5.75,
+    "Panasonic Lumix DMC-FS62": 5.75,
+    "Panasonic Lumix DMC-FS7": 5.75,
+    "Panasonic Lumix DMC-FT1": 6.08,
+    "Panasonic Lumix DMC-FT10": 6.08,
+    "Panasonic Lumix DMC-FT2": 6.08,
+    "Panasonic Lumix DMC-FT20": 6.08,
+    "Panasonic Lumix DMC-FT3": 6.08,
+    "Panasonic Lumix DMC-FT4": 6.08,
+    "Panasonic Lumix DMC-FX01": 5.75,
+    "Panasonic Lumix DMC-FX07": 5.75,
+    "Panasonic Lumix DMC-FX1": 5.75,
+    "Panasonic Lumix DMC-FX10": 5.75,
+    "Panasonic Lumix DMC-FX100": 7.44,
+    "Panasonic Lumix DMC-FX12": 5.75,
+    "Panasonic Lumix DMC-FX150": 7.44,
+    "Panasonic Lumix DMC-FX2": 5.75,
+    "Panasonic Lumix DMC-FX3": 5.75,
+    "Panasonic Lumix DMC-FX30": 5.75,
+    "Panasonic Lumix DMC-FX33": 5.75,
+    "Panasonic Lumix DMC-FX35": 6.08,
+    "Panasonic Lumix DMC-FX37": 6.08,
+    "Panasonic Lumix DMC-FX40": 6.08,
+    "Panasonic Lumix DMC-FX48": 6.08,
+    "Panasonic Lumix DMC-FX5": 5.75,
+    "Panasonic Lumix DMC-FX50": 5.75,
+    "Panasonic Lumix DMC-FX500": 6.08,
+    "Panasonic Lumix DMC-FX55": 5.75,
+    "Panasonic Lumix DMC-FX550": 6.08,
+    "Panasonic Lumix DMC-FX580": 6.08,
+    "Panasonic Lumix DMC-FX60": 6.08,
+    "Panasonic Lumix DMC-FX65": 6.08,
+    "Panasonic Lumix DMC-FX66": 6.08,
+    "Panasonic Lumix DMC-FX68": 6.08,
+    "Panasonic Lumix DMC-FX7": 5.75,
+    "Panasonic Lumix DMC-FX70": 6.08,
+    "Panasonic Lumix DMC-FX700": 6.08,
+    "Panasonic Lumix DMC-FX75": 6.08,
+    "Panasonic Lumix DMC-FX77": 6.08,
+    "Panasonic Lumix DMC-FX78": 6.08,
+    "Panasonic Lumix DMC-FX8": 5.75,
+    "Panasonic Lumix DMC-FX80": 6.08,
+    "Panasonic Lumix DMC-FX9": 5.75,
+    "Panasonic Lumix DMC-FX90": 6.08,
+    "Panasonic Lumix DMC-FZ1": 4.5,
+    "Panasonic Lumix DMC-FZ10": 5.75,
+    "Panasonic Lumix DMC-FZ100": 6.08,
+    "Panasonic Lumix DMC-FZ1000": 13.2,
+    "Panasonic Lumix DMC-FZ15": 5.75,
+    "Panasonic Lumix DMC-FZ150": 6.08,
+    "Panasonic Lumix DMC-FZ18": 5.75,
+    "Panasonic Lumix DMC-FZ2": 4.5,
+    "Panasonic Lumix DMC-FZ20": 5.75,
+    "Panasonic Lumix DMC-FZ200": 6.16,
+    "Panasonic Lumix DMC-FZ28": 6.08,
+    "Panasonic Lumix DMC-FZ3": 4.5,
+    "Panasonic Lumix DMC-FZ30": 7.11,
+    "Panasonic Lumix DMC-FZ35": 6.08,
+    "Panasonic Lumix DMC-FZ38": 6.08,
+    "Panasonic Lumix DMC-FZ4": 5.75,
+    "Panasonic Lumix DMC-FZ40": 6.08,
+    "Panasonic Lumix DMC-FZ45": 6.08,
+    "Panasonic Lumix DMC-FZ47": 6.08,
+    "Panasonic Lumix DMC-FZ48": 6.08,
+    "Panasonic Lumix DMC-FZ5": 5.75,
+    "Panasonic Lumix DMC-FZ50": 7.11,
+    "Panasonic Lumix DMC-FZ60": 6.08,
+    "Panasonic Lumix DMC-FZ7": 5.75,
+    "Panasonic Lumix DMC-FZ70": 6.16,
+    "Panasonic Lumix DMC-FZ8": 5.75,
+    "Panasonic Lumix DMC-G1": 17.3,
+    "Panasonic Lumix DMC-G10": 17.3,
+    "Panasonic Lumix DMC-G2": 17.3,
+    "Panasonic Lumix DMC-G3": 17.3,
+    "Panasonic Lumix DMC-G5": 17.3,
+    "Panasonic Lumix DMC-G6": 17.3,
+    "Panasonic Lumix DMC-GF1": 17.3,
+    "Panasonic Lumix DMC-GF2": 17.3,
+    "Panasonic Lumix DMC-GF3": 17.3,
+    "Panasonic Lumix DMC-GF5": 17.3,
+    "Panasonic Lumix DMC-GF6": 17.3,
+    "Panasonic Lumix DMC-GH1": 17.3,
+    "Panasonic Lumix DMC-GH2": 17.3,
+    "Panasonic Lumix DMC-GH3": 17.3,
+    "Panasonic Lumix DMC-GH4": 17.3,
+    "Panasonic Lumix DMC-GM1": 17.3,
+    "Panasonic Lumix DMC-GM5": 17.3,
+    "Panasonic Lumix DMC-GX1": 17.3,
+    "Panasonic Lumix DMC-GX7": 17.3,
+    "Panasonic Lumix DMC-L1": 17.3,
+    "Panasonic Lumix DMC-L10": 17.3,
+    "Panasonic Lumix DMC-LC1": 8.8,
+    "Panasonic Lumix DMC-LC20": 5.33,
+    "Panasonic Lumix DMC-LC33": 5.75,
+    "Panasonic Lumix DMC-LC40": 7.53,
+    "Panasonic Lumix DMC-LC43": 5.75,
+    "Panasonic Lumix DMC-LC5": 7.53,
+    "Panasonic Lumix DMC-LC50": 5.75,
+    "Panasonic Lumix DMC-LC70": 5.75,
+    "Panasonic Lumix DMC-LC80": 5.75,
+    "Panasonic Lumix DMC-LF1": 7.53,
+    "Panasonic Lumix DMC-LS1": 5.75,
+    "Panasonic Lumix DMC-LS2": 5.75,
+    "Panasonic Lumix DMC-LS3": 5.75,
+    "Panasonic Lumix DMC-LS5": 6.08,
+    "Panasonic Lumix DMC-LS6": 6.08,
+    "Panasonic Lumix DMC-LS60": 5.75,
+    "Panasonic Lumix DMC-LS75": 5.75,
+    "Panasonic Lumix DMC-LS80": 5.75,
+    "Panasonic Lumix DMC-LS85": 5.75,
+    "Panasonic Lumix DMC-LX1": 7.76,
+    "Panasonic Lumix DMC-LX100": 17.3,
+    "Panasonic Lumix DMC-LX2": 7.76,
+    "Panasonic Lumix DMC-LX3": 7.85,
+    "Panasonic Lumix DMC-LX5": 7.85,
+    "Panasonic Lumix DMC-LX7": 7.53,
+    "Panasonic Lumix DMC-LZ1": 5.75,
+    "Panasonic Lumix DMC-LZ10": 6.08,
+    "Panasonic Lumix DMC-LZ2": 5.75,
+    "Panasonic Lumix DMC-LZ3": 5.75,
+    "Panasonic Lumix DMC-LZ30": 6.16,
+    "Panasonic Lumix DMC-LZ40": 6.16,
+    "Panasonic Lumix DMC-LZ5": 5.75,
+    "Panasonic Lumix DMC-LZ6": 5.75,
+    "Panasonic Lumix DMC-LZ7": 5.75,
+    "Panasonic Lumix DMC-LZ8": 5.75,
+    "Panasonic Lumix DMC-S1": 6.08,
+    "Panasonic Lumix DMC-S2": 6.08,
+    "Panasonic Lumix DMC-S3": 6.08,
+    "Panasonic Lumix DMC-S5": 6.08,
+    "Panasonic Lumix DMC-SZ1": 6.08,
+    "Panasonic Lumix DMC-SZ3": 6.08,
+    "Panasonic Lumix DMC-SZ5": 6.08,
+    "Panasonic Lumix DMC-SZ7": 6.08,
+    "Panasonic Lumix DMC-SZ8": 6.08,
+    "Panasonic Lumix DMC-SZ9": 6.08,
+    "Panasonic Lumix DMC-TS1": 6.08,
+    "Panasonic Lumix DMC-TS10": 6.08,
+    "Panasonic Lumix DMC-TS2": 6.08,
+    "Panasonic Lumix DMC-TS20": 6.08,
+    "Panasonic Lumix DMC-TS25": 6.08,
+    "Panasonic Lumix DMC-TS3": 6.08,
+    "Panasonic Lumix DMC-TS4": 6.08,
+    "Panasonic Lumix DMC-TS5": 6.08,
+    "Panasonic Lumix DMC-TZ1": 5.75,
+    "Panasonic Lumix DMC-TZ10": 6.08,
+    "Panasonic Lumix DMC-TZ18": 6.08,
+    "Panasonic Lumix DMC-TZ2": 6.08,
+    "Panasonic Lumix DMC-TZ20": 6.08,
+    "Panasonic Lumix DMC-TZ22": 6.08,
+    "Panasonic Lumix DMC-TZ25": 6.16,
+    "Panasonic Lumix DMC-TZ3": 6.08,
+    "Panasonic Lumix DMC-TZ30": 6.08,
+    "Panasonic Lumix DMC-TZ31": 6.08,
+    "Panasonic Lumix DMC-TZ4": 5.75,
+    "Panasonic Lumix DMC-TZ5": 6.08,
+    "Panasonic Lumix DMC-TZ50": 6.08,
+    "Panasonic Lumix DMC-TZ6": 5.75,
+    "Panasonic Lumix DMC-TZ7": 6.08,
+    "Panasonic Lumix DMC-XS1": 6.08,
+    "Panasonic Lumix DMC-XS3": 6.08,
+    "Panasonic Lumix DMC-ZR1": 6.08,
+    "Panasonic Lumix DMC-ZR3": 6.08,
+    "Panasonic Lumix DMC-ZS1": 5.75,
+    "Panasonic Lumix DMC-ZS10": 6.08,
+    "Panasonic Lumix DMC-ZS15": 6.16,
+    "Panasonic Lumix DMC-ZS20": 6.08,
+    "Panasonic Lumix DMC-ZS25": 6.08,
+    "Panasonic Lumix DMC-ZS3": 6.08,
+    "Panasonic Lumix DMC-ZS30": 6.16,
+    "Panasonic Lumix DMC-ZS35 / TZ55": 6.08,
+    "Panasonic Lumix DMC-ZS40 / TZ60": 6.16,
+    "Panasonic Lumix DMC-ZS5": 6.08,
+    "Panasonic Lumix DMC-ZS7": 6.08,
+    "Panasonic Lumix DMC-ZS8": 6.08,
+    "Panasonic Lumix DMC-ZX1": 6.08,
+    "Panasonic Lumix DMC-ZX3": 6.08,
+    "Panasonic PV DC3000": 7.11,
+    "Pentax *ist D": 23.5,
+    "Pentax *ist DL": 23.5,
+    "Pentax *ist DL2": 23.5,
+    "Pentax *ist DS": 23.5,
+    "Pentax *ist DS2": 23.5,
+    "Pentax 645D": 44.0,
+    "Pentax 645Z": 44.0,
+    "Pentax EI-100": 4.5,
+    "Pentax EI-200": 5.33,
+    "Pentax EI-2000": 8.8,
+    "Pentax Efina": 6.16,
+    "Pentax K-01": 23.7,
+    "Pentax K-3": 23.5,
+    "Pentax K-30": 23.7,
+    "Pentax K-5": 23.6,
+    "Pentax K-5 II": 23.7,
+    "Pentax K-50": 23.7,
+    "Pentax K-500": 23.7,
+    "Pentax K-7": 23.4,
+    "Pentax K-S1": 23.5,
+    "Pentax K-m": 23.5,
+    "Pentax K-r": 23.6,
+    "Pentax K-x": 23.6,
+    "Pentax K100D": 23.5,
+    "Pentax K100D Super": 23.5,
+    "Pentax K10D": 23.5,
+    "Pentax K110D": 23.7,
+    "Pentax K200D": 23.5,
+    "Pentax K20D": 23.4,
+    "Pentax MX-1": 7.53,
+    "Pentax Optio 230": 5.33,
+    "Pentax Optio 30": 5.33,
+    "Pentax Optio 330": 7.11,
+    "Pentax Optio 330GS": 5.33,
+    "Pentax Optio 330RS": 7.11,
+    "Pentax Optio 33L": 5.33,
+    "Pentax Optio 33LF": 5.33,
+    "Pentax Optio 33WR": 5.33,
+    "Pentax Optio 430": 7.11,
+    "Pentax Optio 430RS": 7.11,
+    "Pentax Optio 43WR": 5.33,
+    "Pentax Optio 450": 7.11,
+    "Pentax Optio 50": 5.75,
+    "Pentax Optio 50L": 5.75,
+    "Pentax Optio 550": 7.11,
+    "Pentax Optio 555": 7.11,
+    "Pentax Optio 60": 7.11,
+    "Pentax Optio 750Z": 7.11,
+    "Pentax Optio A10": 7.11,
+    "Pentax Optio A20": 5.75,
+    "Pentax Optio A30": 7.11,
+    "Pentax Optio A40": 7.53,
+    "Pentax Optio E10": 5.75,
+    "Pentax Optio E20": 5.75,
+    "Pentax Optio E25": 5.75,
+    "Pentax Optio E30": 5.75,
+    "Pentax Optio E40": 5.75,
+    "Pentax Optio E50": 5.75,
+    "Pentax Optio E60": 6.08,
+    "Pentax Optio E70": 6.16,
+    "Pentax Optio E70L": 6.16,
+    "Pentax Optio E75": 6.08,
+    "Pentax Optio E80": 6.08,
+    "Pentax Optio E85": 6.16,
+    "Pentax Optio E90": 6.16,
+    "Pentax Optio H90": 6.16,
+    "Pentax Optio I-10": 6.16,
+    "Pentax Optio L20": 5.75,
+    "Pentax Optio L50": 6.03,
+    "Pentax Optio LS1000": 6.16,
+    "Pentax Optio LS1100": 6.08,
+    "Pentax Optio LS465": 23.7,
+    "Pentax Optio M10": 5.75,
+    "Pentax Optio M20": 5.75,
+    "Pentax Optio M30": 5.75,
+    "Pentax Optio M40": 6.03,
+    "Pentax Optio M50": 6.03,
+    "Pentax Optio M60": 6.16,
+    "Pentax Optio M85": 6.16,
+    "Pentax Optio M90": 6.08,
+    "Pentax Optio MX": 5.33,
+    "Pentax Optio MX4": 5.33,
+    "Pentax Optio P70": 6.16,
+    "Pentax Optio P80": 6.16,
+    "Pentax Optio RS1000": 6.16,
+    "Pentax Optio RS1500": 6.08,
+    "Pentax Optio RZ10": 6.08,
+    "Pentax Optio RZ18": 6.08,
+    "Pentax Optio S": 5.75,
+    "Pentax Optio S1": 6.16,
+    "Pentax Optio S10": 7.11,
+    "Pentax Optio S12": 7.53,
+    "Pentax Optio S30": 5.33,
+    "Pentax Optio S4": 5.75,
+    "Pentax Optio S40": 5.75,
+    "Pentax Optio S45": 5.75,
+    "Pentax Optio S4i": 5.75,
+    "Pentax Optio S50": 5.75,
+    "Pentax Optio S55": 5.75,
+    "Pentax Optio S5i": 5.75,
+    "Pentax Optio S5n": 5.75,
+    "Pentax Optio S5z": 5.75,
+    "Pentax Optio S6": 5.75,
+    "Pentax Optio S60": 5.75,
+    "Pentax Optio S7": 5.75,
+    "Pentax Optio SV": 5.75,
+    "Pentax Optio SVi": 5.75,
+    "Pentax Optio T10": 5.75,
+    "Pentax Optio T20": 5.75,
+    "Pentax Optio T30": 5.75,
+    "Pentax Optio V10": 6.03,
+    "Pentax Optio V20": 6.03,
+    "Pentax Optio VS20": 6.08,
+    "Pentax Optio W10": 5.75,
+    "Pentax Optio W20": 5.75,
+    "Pentax Optio W30": 5.75,
+    "Pentax Optio W60": 6.16,
+    "Pentax Optio W80": 6.16,
+    "Pentax Optio W90": 6.16,
+    "Pentax Optio WG-1": 6.16,
+    "Pentax Optio WG-1 GPS": 6.16,
+    "Pentax Optio WG-2": 6.16,
+    "Pentax Optio WG-2 GPS": 6.16,
+    "Pentax Optio WP": 5.75,
+    "Pentax Optio WPi": 5.75,
+    "Pentax Optio WS80": 6.08,
+    "Pentax Optio X": 5.75,
+    "Pentax Optio Z10": 5.75,
+    "Pentax Q": 6.16,
+    "Pentax Q-S1": 7.53,
+    "Pentax Q10": 6.16,
+    "Pentax Q7": 7.53,
+    "Pentax WG-10": 6.16,
+    "Pentax WG-3": 6.16,
+    "Pentax X-5": 6.08,
+    "Pentax X70": 6.08,
+    "Pentax X90": 6.08,
+    "Pentax XG-1": 6.08,
+    "Phase One iXM-RS100F": 53.4,
+    "Praktica DC 20": 6.4,
+    "Praktica DC 21": 5.33,
+    "Praktica DC 22": 5.33,
+    "Praktica DC 32": 5.33,
+    "Praktica DC 34": 5.33,
+    "Praktica DC 42": 5.75,
+    "Praktica DC 44": 5.75,
+    "Praktica DC 50": 5.75,
+    "Praktica DC 52": 5.75,
+    "Praktica DC 60": 5.75,
+    "Praktica DC Slim 2": 6.4,
+    "Praktica DC Slim 5": 7.11,
+    "Praktica DC440": 5.75,
+    "Praktica DCZ 1.3": 6.4,
+    "Praktica DCZ 10.1": 7.11,
+    "Praktica DCZ 10.2": 6.16,
+    "Praktica DCZ 10.3": 6.08,
+    "Praktica DCZ 10.4": 6.16,
+    "Praktica DCZ 12.1": 6.16,
+    "Praktica DCZ 12.Z4": 6.16,
+    "Praktica DCZ 14.1": 6.16,
+    "Praktica DCZ 14.2": 6.16,
+    "Praktica DCZ 2.0": 4.8,
+    "Praktica DCZ 2.1": 4.8,
+    "Praktica DCZ 2.1 S": 4.5,
+    "Praktica DCZ 2.2": 5.33,
+    "Praktica DCZ 2.2 S": 6.4,
+    "Praktica DCZ 3.0": 6.4,
+    "Praktica DCZ 3.2": 7.11,
+    "Praktica DCZ 3.2D": 6.4,
+    "Praktica DCZ 3.2S": 6.4,
+    "Praktica DCZ 3.3": 7.11,
+    "Praktica DCZ 3.4": 5.75,
+    "Praktica DCZ 3.5": 5.75,
+    "Praktica DCZ 4.1": 7.11,
+    "Praktica DCZ 4.2": 7.11,
+    "Praktica DCZ 4.3": 7.11,
+    "Praktica DCZ 4.4": 5.75,
+    "Praktica DCZ 5.1": 7.11,
+    "Praktica DCZ 5.2": 7.11,
+    "Praktica DCZ 5.3": 5.75,
+    "Praktica DCZ 5.4": 5.75,
+    "Praktica DCZ 5.5": 5.75,
+    "Praktica DCZ 5.8": 5.75,
+    "Praktica DCZ 6.1": 5.75,
+    "Praktica DCZ 6.2": 5.75,
+    "Praktica DCZ 6.3": 5.75,
+    "Praktica DCZ 6.8": 5.75,
+    "Praktica DCZ 7.1": 5.75,
+    "Praktica DCZ 7.2": 5.75,
+    "Praktica DCZ 7.3": 5.75,
+    "Praktica DCZ 7.4": 5.75,
+    "Praktica DCZ 8.1": 5.75,
+    "Praktica DCZ 8.2": 5.75,
+    "Praktica DCZ 8.3": 5.75,
+    "Praktica DMMC": 4.8,
+    "Praktica DMMC 4": 4.8,
+    "Praktica DVC 6.1": 5.75,
+    "Praktica Digi 3": 6.4,
+    "Praktica Digi 3 LM": 6.4,
+    "Praktica Digi 30": 6.4,
+    "Praktica Digicam 3": 6.4,
+    "Praktica Dpix 1000z": 6.16,
+    "Praktica Dpix 1100z": 5.75,
+    "Praktica Dpix 1220z": 6.08,
+    "Praktica Dpix 3000": 6.4,
+    "Praktica Dpix 3200": 4.8,
+    "Praktica Dpix 3300": 4.8,
+    "Praktica Dpix 5000 WP": 4.5,
+    "Praktica Dpix 5100": 5.75,
+    "Praktica Dpix 510Z": 5.75,
+    "Praktica Dpix 5200": 5.75,
+    "Praktica Dpix 530Z": 5.75,
+    "Praktica Dpix 740z": 5.75,
+    "Praktica Dpix 750z": 5.75,
+    "Praktica Dpix 810z": 5.75,
+    "Praktica Dpix 820z": 5.75,
+    "Praktica Dpix 9000": 6.16,
+    "Praktica Dpix 910z": 6.4,
+    "Praktica Exakta DC 4200": 7.11,
+    "Praktica G2.0": 6.4,
+    "Praktica G3.2": 6.4,
+    "Praktica Luxmedia 10 TS": 6.16,
+    "Praktica Luxmedia 10 X3": 7.11,
+    "Praktica Luxmedia 10 XS": 5.75,
+    "Praktica Luxmedia 10-03": 6.16,
+    "Praktica Luxmedia 10-23": 6.16,
+    "Praktica Luxmedia 12 HD": 7.44,
+    "Praktica Luxmedia 12 TS": 6.16,
+    "Praktica Luxmedia 12 XS": 5.75,
+    "Praktica Luxmedia 12-03": 5.75,
+    "Praktica Luxmedia 12-04": 6.16,
+    "Praktica Luxmedia 12-23": 6.16,
+    "Praktica Luxmedia 12-Z4": 6.16,
+    "Praktica Luxmedia 12-Z4TS": 6.16,
+    "Praktica Luxmedia 12-Z5": 6.16,
+    "Praktica Luxmedia 14-Z50S": 6.16,
+    "Praktica Luxmedia 14-Z51": 6.16,
+    "Praktica Luxmedia 14-Z80S": 6.16,
+    "Praktica Luxmedia 16-Z12S": 6.16,
+    "Praktica Luxmedia 16-Z21C": 6.16,
+    "Praktica Luxmedia 16-Z21S": 6.16,
+    "Praktica Luxmedia 16-Z24S": 6.16,
+    "Praktica Luxmedia 16-Z51": 6.16,
+    "Praktica Luxmedia 16-Z52": 6.16,
+    "Praktica Luxmedia 18-Z36C": 6.16,
+    "Praktica Luxmedia 4008": 5.75,
+    "Praktica Luxmedia 5003": 7.11,
+    "Praktica Luxmedia 5008": 5.75,
+    "Praktica Luxmedia 5103": 7.11,
+    "Praktica Luxmedia 5203": 5.75,
+    "Praktica Luxmedia 5303": 5.75,
+    "Praktica Luxmedia 6103": 7.11,
+    "Praktica Luxmedia 6105": 5.75,
+    "Praktica Luxmedia 6203": 5.75,
+    "Praktica Luxmedia 6403": 5.75,
+    "Praktica Luxmedia 6503": 5.75,
+    "Praktica Luxmedia 7103": 5.75,
+    "Praktica Luxmedia 7105": 5.75,
+    "Praktica Luxmedia 7203": 5.75,
+    "Praktica Luxmedia 7303": 5.75,
+    "Praktica Luxmedia 7403": 5.75,
+    "Praktica Luxmedia 8003": 7.11,
+    "Praktica Luxmedia 8203": 5.75,
+    "Praktica Luxmedia 8213": 5.75,
+    "Praktica Luxmedia 8303": 5.75,
+    "Praktica Luxmedia 8403": 5.75,
+    "Praktica Luxmedia 8503": 5.75,
+    "Praktica Mini": 6.4,
+    "Praktica V2.1": 6.4,
+    "Praktica V3.2": 6.4,
+    "Ricoh CX1": 6.16,
+    "Ricoh CX2": 6.16,
+    "Ricoh CX3": 6.16,
+    "Ricoh CX4": 6.16,
+    "Ricoh CX5": 6.16,
+    "Ricoh CX6": 6.16,
+    "Ricoh Caplio 400G Wide": 5.33,
+    "Ricoh Caplio 500G": 7.11,
+    "Ricoh Caplio 500G Wide": 7.11,
+    "Ricoh Caplio 500SE": 7.11,
+    "Ricoh Caplio G3": 5.33,
+    "Ricoh Caplio G3s": 5.33,
+    "Ricoh Caplio GX": 7.11,
+    "Ricoh Caplio GX100": 7.31,
+    "Ricoh Caplio GX200": 7.53,
+    "Ricoh Caplio GX8": 7.11,
+    "Ricoh Caplio R1": 7.11,
+    "Ricoh Caplio R1V": 5.75,
+    "Ricoh Caplio R2": 5.75,
+    "Ricoh Caplio R2S": 5.75,
+    "Ricoh Caplio R3": 5.75,
+    "Ricoh Caplio R30": 5.75,
+    "Ricoh Caplio R4": 5.75,
+    "Ricoh Caplio R40": 5.75,
+    "Ricoh Caplio R5": 5.75,
+    "Ricoh Caplio R6": 5.75,
+    "Ricoh Caplio R7": 5.75,
+    "Ricoh Caplio R8": 6.16,
+    "Ricoh Caplio RR1": 7.11,
+    "Ricoh Caplio RR10": 5.33,
+    "Ricoh Caplio RR120": 4.5,
+    "Ricoh Caplio RR230": 4.5,
+    "Ricoh Caplio RR30": 5.33,
+    "Ricoh Caplio RR330": 5.75,
+    "Ricoh Caplio RR530": 5.75,
+    "Ricoh Caplio RR630": 7.11,
+    "Ricoh Caplio RR660": 5.75,
+    "Ricoh Caplio RR750": 5.75,
+    "Ricoh Caplio RR770": 5.75,
+    "Ricoh Caplio RX": 5.33,
+    "Ricoh Caplio RZ1": 5.75,
+    "Ricoh G600": 6.16,
+    "Ricoh G700": 6.16,
+    "Ricoh G700SE": 6.16,
+    "Ricoh G800": 6.16,
+    "Ricoh GR": 23.6,
+    "Ricoh GR Digital": 7.11,
+    "Ricoh GR Digital 3": 7.53,
+    "Ricoh GR Digital 4": 7.53,
+    "Ricoh GR Digital II": 7.31,
+    "Ricoh GX200": 7.53,
+    "Ricoh GXR A12 50mm F2.5 Macro": 23.6,
+    "Ricoh GXR A16 24-85mm F3.5-5.5": 23.6,
+    "Ricoh GXR GR Lens A12 28mm F2.5": 23.6,
+    "Ricoh GXR Mount A12": 23.6,
+    "Ricoh GXR P10 28-300mm F3.5-5.6 VC": 6.16,
+    "Ricoh GXR S10 24-72mm F2.5-4.4 VC": 7.53,
+    "Ricoh HZ15": 6.16,
+    "Ricoh PX": 6.16,
+    "Ricoh R10": 6.16,
+    "Ricoh R50": 6.08,
+    "Ricoh R8": 6.16,
+    "Ricoh RDC-200G": 6.4,
+    "Ricoh RDC-4300": 4.8,
+    "Ricoh RDC-5000": 5.33,
+    "Ricoh RDC-5300": 5.33,
+    "Ricoh RDC-6000": 6.4,
+    "Ricoh RDC-7": 7.11,
+    "Ricoh RDC-i500": 7.11,
+    "Ricoh RDC-i700": 7.11,
+    "Ricoh WG-20": 6.16,
+    "Ricoh WG-4": 6.16,
+    "Ricoh WG-M1": 6.16,
+    "Rollei Compactline 100": 6.16,
+    "Rollei Compactline 101": 5.75,
+    "Rollei Compactline 102": 5.75,
+    "Rollei Compactline 103": 6.08,
+    "Rollei Compactline 110": 5.75,
+    "Rollei Compactline 130": 5.75,
+    "Rollei Compactline 150": 6.08,
+    "Rollei Compactline 200": 6.16,
+    "Rollei Compactline 202": 6.08,
+    "Rollei Compactline 203": 6.08,
+    "Rollei Compactline 230": 6.08,
+    "Rollei Compactline 302": 6.08,
+    "Rollei Compactline 304": 6.08,
+    "Rollei Compactline 312": 6.16,
+    "Rollei Compactline 320": 6.08,
+    "Rollei Compactline 350": 6.16,
+    "Rollei Compactline 360 TS": 6.16,
+    "Rollei Compactline 370 TS": 6.16,
+    "Rollei Compactline 390 SE": 6.16,
+    "Rollei Compactline 412": 6.08,
+    "Rollei Compactline 415": 6.08,
+    "Rollei Compactline 425": 6.16,
+    "Rollei Compactline 50": 6.16,
+    "Rollei Compactline 52": 6.16,
+    "Rollei Compactline 55": 6.16,
+    "Rollei Compactline 80": 5.75,
+    "Rollei Compactline 81": 5.75,
+    "Rollei Compactline 90": 6.16,
+    "Rollei Flexline 100": 5.75,
+    "Rollei Flexline 100 iT": 5.75,
+    "Rollei Flexline 140": 6.08,
+    "Rollei Flexline 200": 6.08,
+    "Rollei Flexline 202": 6.16,
+    "Rollei Flexline 250": 6.08,
+    "Rollei Kids 100": 8.8,
+    "Rollei Powerflex 240 HD": 6.16,
+    "Rollei Powerflex 360 Full HD": 6.16,
+    "Rollei Powerflex 3D": 6.08,
+    "Rollei Powerflex 400": 6.08,
+    "Rollei Powerflex 440": 6.08,
+    "Rollei Powerflex 450": 6.08,
+    "Rollei Powerflex 455": 6.08,
+    "Rollei Powerflex 460": 6.08,
+    "Rollei Powerflex 470": 6.08,
+    "Rollei Powerflex 500": 6.08,
+    "Rollei Powerflex 600": 6.16,
+    "Rollei Powerflex 610 HD": 6.16,
+    "Rollei Powerflex 700 Full HD": 6.08,
+    "Rollei Powerflex 800": 6.08,
+    "Rollei Powerflex 820": 6.08,
+    "Rollei Prego da3": 5.33,
+    "Rollei Prego da4": 5.75,
+    "Rollei Prego da5": 5.75,
+    "Rollei Prego da6": 7.11,
+    "Rollei Prego dp4200": 5.75,
+    "Rollei Prego dp5200": 5.75,
+    "Rollei Prego dp5300": 7.11,
+    "Rollei Prego dp5500": 5.75,
+    "Rollei Prego dp6000": 7.11,
+    "Rollei Prego dp6200": 7.11,
+    "Rollei Prego dp6300": 7.11,
+    "Rollei Prego dp8300": 7.11,
+    "Rollei RCP-10325X": 7.11,
+    "Rollei RCP-5324": 5.75,
+    "Rollei RCP-6324": 5.75,
+    "Rollei RCP-7324": 5.75,
+    "Rollei RCP-7325XS": 5.75,
+    "Rollei RCP-7330X": 5.75,
+    "Rollei RCP-7430XW": 5.75,
+    "Rollei RCP-8325": 5.75,
+    "Rollei RCP-8325X": 7.11,
+    "Rollei RCP-8325XS": 5.75,
+    "Rollei RCP-8330X": 5.75,
+    "Rollei RCP-8427XW": 5.75,
+    "Rollei RCP-8527X": 5.75,
+    "Rollei RCP-S10": 6.16,
+    "Rollei RCP-S8": 5.75,
+    "Rollei Sportsline 50": 6.16,
+    "Rollei Sportsline 60 Camouflage": 6.16,
+    "Rollei Sportsline 62": 6.16,
+    "Rollei Sportsline 90": 6.16,
+    "Rollei Sportsline 99": 6.16,
+    "Rollei X-8": 5.75,
+    "Rollei X-8 Compact": 5.75,
+    "Rollei X-8 Sports": 5.75,
+    "Rollei XS-10": 5.75,
+    "Rollei XS-10 inTouch": 6.16,
+    "Rollei XS-8": 5.75,
+    "Rollei XS-8 Crystal": 5.75,
+    "Rollei d20 motion": 7.11,
+    "Rollei d210 motion": 4.5,
+    "Rollei d23 com": 7.53,
+    "Rollei d33 com": 7.11,
+    "Rollei d330 motion": 5.33,
+    "Rollei d41 com": 7.11,
+    "Rollei d530 flex": 8.8,
+    "Rollei da10": 5.75,
+    "Rollei da1325 Prego": 5.75,
+    "Rollei da5324": 5.75,
+    "Rollei da5325 Prego": 5.75,
+    "Rollei da6324": 5.75,
+    "Rollei da7325 Prego": 5.75,
+    "Rollei da8324": 5.75,
+    "Rollei dc 3100": 5.33,
+    "Rollei dcx 310": 7.11,
+    "Rollei dcx 400": 7.11,
+    "Rollei dk 3000": 5.33,
+    "Rollei dk4010": 5.33,
+    "Rollei dp 300": 5.33,
+    "Rollei dp 3210": 5.33,
+    "Rollei dp6500": 7.11,
+    "Rollei dpx 310": 5.33,
+    "Rollei dr 5": 5.75,
+    "Rollei dr 5100": 7.11,
+    "Rollei ds6": 5.75,
+    "Rollei dsx 410": 5.75,
+    "Rollei dt 3200": 5.75,
+    "Rollei dt 4000": 5.75,
+    "Rollei dt 4200": 5.75,
+    "Rollei dt6 Tribute": 5.75,
+    "Rollei dx63": 5.75,
+    "Samsung AQ100": 6.08,
+    "Samsung CL5": 5.75,
+    "Samsung CL65": 6.08,
+    "Samsung CL80": 6.16,
+    "Samsung D75": 5.75,
+    "Samsung D830": 7.11,
+    "Samsung D85": 5.75,
+    "Samsung D860": 5.75,
+    "Samsung DV100": 6.16,
+    "Samsung DV150F": 6.16,
+    "Samsung DV300F": 6.16,
+    "Samsung Digimax 101": 6.4,
+    "Samsung Digimax 130": 4.5,
+    "Samsung Digimax 200": 5.33,
+    "Samsung Digimax 201": 4.5,
+    "Samsung Digimax 202": 6.4,
+    "Samsung Digimax 210 SE": 5.33,
+    "Samsung Digimax 220 SE": 5.33,
+    "Samsung Digimax 230": 5.33,
+    "Samsung Digimax 240": 4.5,
+    "Samsung Digimax 250": 4.5,
+    "Samsung Digimax 301": 5.33,
+    "Samsung Digimax 330": 7.53,
+    "Samsung Digimax 340": 7.11,
+    "Samsung Digimax 35 MP3": 4.8,
+    "Samsung Digimax 350SE": 7.11,
+    "Samsung Digimax 360": 7.11,
+    "Samsung Digimax 370": 5.75,
+    "Samsung Digimax 401": 5.75,
+    "Samsung Digimax 410": 7.11,
+    "Samsung Digimax 420": 7.11,
+    "Samsung Digimax 430": 5.75,
+    "Samsung Digimax 50 duo": 4.8,
+    "Samsung Digimax 530": 7.11,
+    "Samsung Digimax A400": 5.75,
+    "Samsung Digimax A402": 5.75,
+    "Samsung Digimax A5": 7.11,
+    "Samsung Digimax A50": 5.75,
+    "Samsung Digimax A502": 5.75,
+    "Samsung Digimax A503": 5.75,
+    "Samsung Digimax A55W": 5.75,
+    "Samsung Digimax A6": 7.11,
+    "Samsung Digimax A7": 7.11,
+    "Samsung Digimax D103": 7.11,
+    "Samsung Digimax L50": 5.75,
+    "Samsung Digimax L55W": 5.75,
+    "Samsung Digimax L60": 5.75,
+    "Samsung Digimax L70": 5.75,
+    "Samsung Digimax L85": 7.11,
+    "Samsung Digimax S1000": 7.11,
+    "Samsung Digimax S500": 5.75,
+    "Samsung Digimax S600": 5.75,
+    "Samsung Digimax S700": 5.75,
+    "Samsung Digimax S800": 7.11,
+    "Samsung Digimax U-CA 3": 5.33,
+    "Samsung Digimax U-CA 4": 5.75,
+    "Samsung Digimax U-CA 401": 5.75,
+    "Samsung Digimax U-CA5": 5.75,
+    "Samsung Digimax U-CA501": 5.75,
+    "Samsung Digimax U-CA505": 5.75,
+    "Samsung Digimax V3": 7.11,
+    "Samsung Digimax V4": 7.11,
+    "Samsung Digimax V40": 7.11,
+    "Samsung Digimax V4000": 7.11,
+    "Samsung Digimax V5": 7.11,
+    "Samsung Digimax V50": 7.11,
+    "Samsung Digimax V6": 6.74,
+    "Samsung Digimax V600": 7.11,
+    "Samsung Digimax V70": 7.11,
+    "Samsung Digimax V700": 7.11,
+    "Samsung Digimax V800": 7.11,
+    "Samsung Digimax i5": 5.75,
+    "Samsung Digimax i50 MP3": 5.75,
+    "Samsung Digimax i6": 5.75,
+    "Samsung ES10": 5.75,
+    "Samsung ES15": 6.08,
+    "Samsung ES17": 6.08,
+    "Samsung ES20": 6.08,
+    "Samsung ES25": 6.08,
+    "Samsung ES28": 6.08,
+    "Samsung ES30": 6.16,
+    "Samsung ES50": 6.08,
+    "Samsung ES55": 6.08,
+    "Samsung ES60": 6.08,
+    "Samsung ES65": 6.08,
+    "Samsung ES70": 6.08,
+    "Samsung ES73": 6.08,
+    "Samsung ES75": 6.08,
+    "Samsung ES80": 6.16,
+    "Samsung ES90": 6.16,
+    "Samsung ES95": 6.16,
+    "Samsung EX1": 7.53,
+    "Samsung EX2F": 7.53,
+    "Samsung GX-10": 23.5,
+    "Samsung GX-1L": 23.5,
+    "Samsung GX-1S": 23.5,
+    "Samsung GX-20": 23.4,
+    "Samsung Galaxy Camera": 6.16,
+    "Samsung Galaxy Camera 2": 6.16,
+    "Samsung Galaxy NX": 23.5,
+    "Samsung Galaxy S2": 4.54,
+    "Samsung Galaxy S3": 4.54,
+    "Samsung Galaxy S4": 4.69,
+    "Samsung Galaxy S4 Zoom": 6.19,
+    "Samsung HZ10W": 6.08,
+    "Samsung HZ15W": 6.08,
+    "Samsung HZ25W": 6.08,
+    "Samsung HZ30W": 6.16,
+    "Samsung HZ35W": 6.16,
+    "Samsung HZ50W": 6.16,
+    "Samsung IT100": 6.08,
+    "Samsung L100": 5.75,
+    "Samsung L110": 5.75,
+    "Samsung L200": 5.75,
+    "Samsung L201": 6.08,
+    "Samsung L210": 6.08,
+    "Samsung L301": 6.08,
+    "Samsung L310W": 7.44,
+    "Samsung L700": 5.75,
+    "Samsung L73": 5.75,
+    "Samsung L730": 5.75,
+    "Samsung L74": 5.75,
+    "Samsung L74 Wide": 5.75,
+    "Samsung L77": 5.75,
+    "Samsung L80": 7.11,
+    "Samsung L830": 5.75,
+    "Samsung L83T": 5.75,
+    "Samsung M100": 5.75,
+    "Samsung MV800": 6.16,
+    "Samsung Miniket VP-MS10": 5.75,
+    "Samsung Miniket VP-MS11": 5.75,
+    "Samsung Miniket VP-MS15": 5.75,
+    "Samsung NV10": 7.11,
+    "Samsung NV100 HD": 7.44,
+    "Samsung NV11": 7.11,
+    "Samsung NV15": 7.27,
+    "Samsung NV20": 7.44,
+    "Samsung NV24HD": 6.16,
+    "Samsung NV3": 5.75,
+    "Samsung NV30": 5.75,
+    "Samsung NV4": 5.75,
+    "Samsung NV40": 6.08,
+    "Samsung NV7 OPS": 5.75,
+    "Samsung NV8": 7.27,
+    "Samsung NV9": 6.16,
+    "Samsung NX mini": 13.2,
+    "Samsung NX1": 23.5,
+    "Samsung NX10": 23.4,
+    "Samsung NX100": 23.4,
+    "Samsung NX1000": 23.5,
+    "Samsung NX11": 23.4,
+    "Samsung NX1100": 23.5,
+    "Samsung NX20": 23.5,
+    "Samsung NX200": 23.5,
+    "Samsung NX2000": 23.5,
+    "Samsung NX210": 23.5,
+    "Samsung NX30": 23.5,
+    "Samsung NX300": 23.5,
+    "Samsung NX3000": 23.5,
+    "Samsung NX5": 23.4,
+    "Samsung PL10": 5.75,
+    "Samsung PL100": 6.08,
+    "Samsung PL120": 6.16,
+    "Samsung PL150": 6.08,
+    "Samsung PL160": 6.08,
+    "Samsung PL170": 6.08,
+    "Samsung PL20": 6.16,
+    "Samsung PL200": 6.16,
+    "Samsung PL210": 6.16,
+    "Samsung PL50": 6.08,
+    "Samsung PL51": 6.16,
+    "Samsung PL55": 6.08,
+    "Samsung PL60": 6.08,
+    "Samsung PL65": 6.08,
+    "Samsung PL70": 6.08,
+    "Samsung PL80": 6.08,
+    "Samsung PL90": 6.08,
+    "Samsung Pro815": 8.8,
+    "Samsung S1030": 7.11,
+    "Samsung S1050": 7.11,
+    "Samsung S1060": 6.08,
+    "Samsung S1070": 6.08,
+    "Samsung S630": 5.75,
+    "Samsung S730": 5.75,
+    "Samsung S750": 5.75,
+    "Samsung S760": 5.75,
+    "Samsung S830": 7.11,
+    "Samsung S85": 5.75,
+    "Samsung S850": 7.11,
+    "Samsung S860": 5.75,
+    "Samsung SDC-MS61": 5.75,
+    "Samsung SH100": 6.08,
+    "Samsung SL102": 6.08,
+    "Samsung SL201": 6.08,
+    "Samsung SL202": 6.08,
+    "Samsung SL30": 6.08,
+    "Samsung SL310W": 7.44,
+    "Samsung SL50": 6.08,
+    "Samsung SL502": 6.08,
+    "Samsung SL600": 6.08,
+    "Samsung SL605": 6.08,
+    "Samsung SL620": 6.08,
+    "Samsung SL630": 6.08,
+    "Samsung SL720": 6.08,
+    "Samsung SL820": 6.08,
+    "Samsung ST10": 5.75,
+    "Samsung ST100": 6.16,
+    "Samsung ST1000": 6.16,
+    "Samsung ST150F": 6.16,
+    "Samsung ST200F": 6.16,
+    "Samsung ST30": 4.8,
+    "Samsung ST45": 6.08,
+    "Samsung ST50": 6.08,
+    "Samsung ST500": 6.16,
+    "Samsung ST5000": 6.16,
+    "Samsung ST550": 6.08,
+    "Samsung ST5500": 6.16,
+    "Samsung ST60": 6.16,
+    "Samsung ST600": 6.08,
+    "Samsung ST65": 6.16,
+    "Samsung ST6500": 6.08,
+    "Samsung ST66": 6.16,
+    "Samsung ST70": 6.16,
+    "Samsung ST700": 6.16,
+    "Samsung ST72": 6.16,
+    "Samsung ST76": 6.16,
+    "Samsung ST77": 6.16,
+    "Samsung ST80": 6.08,
+    "Samsung ST88": 6.16,
+    "Samsung ST90": 6.16,
+    "Samsung ST93": 6.16,
+    "Samsung ST95": 6.16,
+    "Samsung ST96": 6.16,
+    "Samsung TL100": 6.08,
+    "Samsung TL105": 6.16,
+    "Samsung TL110": 6.16,
+    "Samsung TL205": 6.08,
+    "Samsung TL210": 6.08,
+    "Samsung TL220": 6.08,
+    "Samsung TL225": 6.08,
+    "Samsung TL240": 6.16,
+    "Samsung TL320": 6.08,
+    "Samsung TL34HD": 7.44,
+    "Samsung TL350": 6.08,
+    "Samsung TL500": 7.53,
+    "Samsung TL9": 6.16,
+    "Samsung WB100": 6.16,
+    "Samsung WB1000": 6.08,
+    "Samsung WB110": 6.16,
+    "Samsung WB1100F": 6.16,
+    "Samsung WB150F": 6.16,
+    "Samsung WB2000": 6.08,
+    "Samsung WB200F": 6.16,
+    "Samsung WB210": 6.16,
+    "Samsung WB2100": 6.16,
+    "Samsung WB2200F": 6.16,
+    "Samsung WB250F": 6.16,
+    "Samsung WB30F": 6.16,
+    "Samsung WB350F": 6.16,
+    "Samsung WB35F": 6.16,
+    "Samsung WB500": 6.08,
+    "Samsung WB5000": 6.08,
+    "Samsung WB50F": 6.16,
+    "Samsung WB510": 6.08,
+    "Samsung WB550": 6.08,
+    "Samsung WB5500": 6.16,
+    "Samsung WB560": 6.08,
+    "Samsung WB600": 6.16,
+    "Samsung WB650": 6.16,
+    "Samsung WB660": 6.16,
+    "Samsung WB690": 6.08,
+    "Samsung WB700": 6.08,
+    "Samsung WB750": 6.08,
+    "Samsung WB800F": 6.16,
+    "Samsung WB850F": 6.16,
+    "Samsung WP10": 6.08,
+    "Samsung i100": 6.08,
+    "Samsung i7": 5.75,
+    "Samsung i70": 5.75,
+    "Samsung i8": 5.75,
+    "Samsung i80": 5.75,
+    "Samsung i85": 5.75,
+    "Sanyo DSC S1": 5.33,
+    "Sanyo DSC S3": 5.33,
+    "Sanyo DSC S4": 5.75,
+    "Sanyo DSC S5": 5.75,
+    "Sanyo VPC A5": 5.75,
+    "Sanyo VPC AZ1": 7.11,
+    "Sanyo VPC AZ3 EX": 7.11,
+    "Sanyo VPC E1090": 6.16,
+    "Sanyo VPC E1403": 6.16,
+    "Sanyo VPC E1500TP": 6.08,
+    "Sanyo VPC E890": 5.75,
+    "Sanyo VPC HD1 EX": 5.75,
+    "Sanyo VPC J1 EX": 5.33,
+    "Sanyo VPC J2 EX": 5.33,
+    "Sanyo VPC J4 EX": 5.33,
+    "Sanyo VPC MZ1": 7.11,
+    "Sanyo VPC MZ2": 7.11,
+    "Sanyo VPC S1085": 6.08,
+    "Sanyo VPC S122": 6.16,
+    "Sanyo VPC S1275": 6.16,
+    "Sanyo VPC S1414": 6.08,
+    "Sanyo VPC S885": 5.75,
+    "Sanyo VPC T1495": 6.08,
+    "Sanyo VPC X1200": 6.16,
+    "Sanyo VPC X1220": 6.16,
+    "Sanyo VPC X1420": 6.16,
+    "Sanyo Xacti C1": 5.33,
+    "Sanyo Xacti C4": 5.33,
+    "Sanyo Xacti C40": 5.33,
+    "Sanyo Xacti C5": 5.75,
+    "Sanyo Xacti C6": 5.75,
+    "Sanyo Xacti DMX-CA65": 5.75,
+    "Sanyo Xacti DMX-CA8": 5.75,
+    "Sanyo Xacti DMX-CG65": 5.75,
+    "Sanyo Xacti DMX-CG9": 6.08,
+    "Sanyo Xacti DMX-HD700": 5.75,
+    "Sanyo Xacti DMX-HD800": 5.75,
+    "Sanyo Xacti E6": 5.75,
+    "Sanyo Xacti E60": 5.75,
+    "Sanyo Xacti S50": 5.75,
+    "Sanyo Xacti S6": 5.75,
+    "Sanyo Xacti S60": 5.75,
+    "Sanyo Xacti S70": 5.75,
+    "Sanyo Xacti VPC S1 EX": 5.33,
+    "Sanyo Xacti VPC S3 EX": 5.33,
+    "Sanyo Xacti VPC S4 EX": 5.75,
+    "Sanyo Xacti VPC-503": 5.75,
+    "Sanyo Xacti VPC-603": 5.75,
+    "Sanyo Xacti VPC-CA6": 5.75,
+    "Sanyo Xacti VPC-CA9": 5.75,
+    "Sanyo Xacti VPC-CG10": 6.08,
+    "Sanyo Xacti VPC-CG6": 5.75,
+    "Sanyo Xacti VPC-E10": 6.16,
+    "Sanyo Xacti VPC-E1075": 6.08,
+    "Sanyo Xacti VPC-E7": 5.75,
+    "Sanyo Xacti VPC-E760": 5.75,
+    "Sanyo Xacti VPC-E860": 5.75,
+    "Sanyo Xacti VPC-E870": 5.75,
+    "Sanyo Xacti VPC-E875": 5.75,
+    "Sanyo Xacti VPC-HD1A": 5.75,
+    "Sanyo Xacti VPC-HD2": 5.75,
+    "Sanyo Xacti VPC-HD2000": 5.75,
+    "Sanyo Xacti VPC-S1070": 6.08,
+    "Sanyo Xacti VPC-S500": 5.75,
+    "Sanyo Xacti VPC-S600": 5.75,
+    "Sanyo Xacti VPC-S650": 5.75,
+    "Sanyo Xacti VPC-S670": 5.75,
+    "Sanyo Xacti VPC-S7": 5.75,
+    "Sanyo Xacti VPC-S750": 5.75,
+    "Sanyo Xacti VPC-S760": 5.75,
+    "Sanyo Xacti VPC-S770": 5.75,
+    "Sanyo Xacti VPC-S870": 5.75,
+    "Sanyo Xacti VPC-S880": 5.75,
+    "Sanyo Xacti VPC-T1060": 6.08,
+    "Sanyo Xacti VPC-T700": 5.75,
+    "Sanyo Xacti VPC-T850": 5.75,
+    "Sanyo Xacti VPC-W800": 5.75,
+    "Sanyo Xacti VPC-X1200": 6.16,
+    "Sigma DP1": 20.7,
+    "Sigma DP1 Merrill": 24.0,
+    "Sigma DP1s": 20.7,
+    "Sigma DP1x": 20.7,
+    "Sigma DP2": 20.7,
+    "Sigma DP2 Merrill": 24.0,
+    "Sigma DP2s": 20.7,
+    "Sigma DP2x": 20.7,
+    "Sigma DP3 Merrill": 24.0,
+    "Sigma SD1": 24.0,
+    "Sigma SD1 Merrill": 24.0,
+    "Sigma SD10": 20.7,
+    "Sigma SD14": 20.7,
+    "Sigma SD15": 20.7,
+    "Sigma SD9": 20.7,
+    "Skydio 2": 5.476,
+    "Sony A77 II": 23.5,
+    "Sony Alpha 7": 35.8,
+    "Sony Alpha 7R": 35.9,
+    "Sony Alpha 7S": 35.6,
+    "Sony Alpha DSLR-A100": 23.6,
+    "Sony Alpha DSLR-A200": 23.6,
+    "Sony Alpha DSLR-A230": 23.5,
+    "Sony Alpha DSLR-A290": 23.5,
+    "Sony Alpha DSLR-A300": 23.6,
+    "Sony Alpha DSLR-A330": 23.5,
+    "Sony Alpha DSLR-A350": 23.6,
+    "Sony Alpha DSLR-A380": 23.6,
+    "Sony Alpha DSLR-A390": 23.5,
+    "Sony Alpha DSLR-A450": 23.4,
+    "Sony Alpha DSLR-A500": 23.5,
+    "Sony Alpha DSLR-A550": 23.4,
+    "Sony Alpha DSLR-A560": 23.5,
+    "Sony Alpha DSLR-A580": 23.5,
+    "Sony Alpha DSLR-A700": 23.5,
+    "Sony Alpha DSLR-A850": 35.9,
+    "Sony Alpha DSLR-A900": 35.9,
+    "Sony Alpha NEX-3": 23.4,
+    "Sony Alpha NEX-3N": 23.5,
+    "Sony Alpha NEX-5": 23.4,
+    "Sony Alpha NEX-5N": 23.4,
+    "Sony Alpha NEX-5R": 23.4,
+    "Sony Alpha NEX-5T": 23.4,
+    "Sony Alpha NEX-6": 23.5,
+    "Sony Alpha NEX-7": 23.5,
+    "Sony Alpha NEX-C3": 23.4,
+    "Sony Alpha NEX-F3": 23.4,
+    "Sony Alpha a3000": 23.5,
+    "Sony Alpha a5000": 23.5,
+    "Sony Alpha a5100": 23.5,
+    "Sony Alpha a6000": 23.5,
+    "Sony C6903": 6.17,
+    "Sony Cyber-shot DSC-RX1": 35.8,
+    "Sony CyberShot DSC HX100V": 6.16,
+    "Sony CyberShot DSC HX7V": 6.16,
+    "Sony CyberShot DSC HX9V": 6.16,
+    "Sony Cybershot DSC D700": 6.4,
+    "Sony Cybershot DSC D770": 6.4,
+    "Sony Cybershot DSC F505": 6.4,
+    "Sony Cybershot DSC F505v": 7.11,
+    "Sony Cybershot DSC F55": 6.4,
+    "Sony Cybershot DSC F55v": 7.11,
+    "Sony Cybershot DSC F707": 8.8,
+    "Sony Cybershot DSC F717": 8.8,
+    "Sony Cybershot DSC F77": 7.11,
+    "Sony Cybershot DSC F828": 8.8,
+    "Sony Cybershot DSC F88": 5.9,
+    "Sony Cybershot DSC FX77": 7.11,
+    "Sony Cybershot DSC G1": 5.75,
+    "Sony Cybershot DSC G3": 6.16,
+    "Sony Cybershot DSC H1": 5.75,
+    "Sony Cybershot DSC H10": 5.75,
+    "Sony Cybershot DSC H100": 6.16,
+    "Sony Cybershot DSC H2": 5.75,
+    "Sony Cybershot DSC H20": 6.16,
+    "Sony Cybershot DSC H200": 6.16,
+    "Sony Cybershot DSC H3": 5.75,
+    "Sony Cybershot DSC H300": 6.16,
+    "Sony Cybershot DSC H400": 6.16,
+    "Sony Cybershot DSC H5": 5.75,
+    "Sony Cybershot DSC H50": 6.16,
+    "Sony Cybershot DSC H55": 6.16,
+    "Sony Cybershot DSC H7": 5.75,
+    "Sony Cybershot DSC H70": 6.16,
+    "Sony Cybershot DSC H9": 5.75,
+    "Sony Cybershot DSC H90": 6.16,
+    "Sony Cybershot DSC HX1": 5.9,
+    "Sony Cybershot DSC HX10V": 6.16,
+    "Sony Cybershot DSC HX200V": 6.16,
+    "Sony Cybershot DSC HX20V": 6.16,
+    "Sony Cybershot DSC HX30V": 6.16,
+    "Sony Cybershot DSC HX5": 5.9,
+    "Sony Cybershot DSC HX50": 6.16,
+    "Sony Cybershot DSC HX60": 6.16,
+    "Sony Cybershot DSC J10": 6.16,
+    "Sony Cybershot DSC L1": 5.33,
+    "Sony Cybershot DSC M1": 5.9,
+    "Sony Cybershot DSC M2": 5.75,
+    "Sony Cybershot DSC N1": 7.11,
+    "Sony Cybershot DSC N2": 7.53,
+    "Sony Cybershot DSC P1": 7.11,
+    "Sony Cybershot DSC P10": 7.11,
+    "Sony Cybershot DSC P100": 7.11,
+    "Sony Cybershot DSC P12": 7.11,
+    "Sony Cybershot DSC P120": 7.11,
+    "Sony Cybershot DSC P150": 7.11,
+    "Sony Cybershot DSC P2": 5.33,
+    "Sony Cybershot DSC P20": 5.33,
+    "Sony Cybershot DSC P200": 7.11,
+    "Sony Cybershot DSC P3": 7.11,
+    "Sony Cybershot DSC P30": 5.33,
+    "Sony Cybershot DSC P31": 5.33,
+    "Sony Cybershot DSC P32": 5.33,
+    "Sony Cybershot DSC P41": 5.33,
+    "Sony Cybershot DSC P43": 5.33,
+    "Sony Cybershot DSC P5": 7.11,
+    "Sony Cybershot DSC P50": 5.33,
+    "Sony Cybershot DSC P51": 5.33,
+    "Sony Cybershot DSC P52": 5.33,
+    "Sony Cybershot DSC P7": 7.11,
+    "Sony Cybershot DSC P71": 7.11,
+    "Sony Cybershot DSC P72": 7.11,
+    "Sony Cybershot DSC P73": 5.33,
+    "Sony Cybershot DSC P8": 5.33,
+    "Sony Cybershot DSC P9": 7.11,
+    "Sony Cybershot DSC P92": 7.11,
+    "Sony Cybershot DSC P93": 7.11,
+    "Sony Cybershot DSC R1": 21.5,
+    "Sony Cybershot DSC RX100": 13.2,
+    "Sony Cybershot DSC RX100 II": 13.2,
+    "Sony Cybershot DSC RX1R": 35.8,
+    "Sony Cybershot DSC S2000": 6.16,
+    "Sony Cybershot DSC S2100": 6.16,
+    "Sony Cybershot DSC S30": 5.33,
+    "Sony Cybershot DSC S3000": 4.96,
+    "Sony Cybershot DSC S40": 5.33,
+    "Sony Cybershot DSC S45": 5.75,
+    "Sony Cybershot DSC S50": 5.33,
+    "Sony Cybershot DSC S5000": 6.16,
+    "Sony Cybershot DSC S60": 5.33,
+    "Sony Cybershot DSC S600": 5.75,
+    "Sony Cybershot DSC S650": 5.75,
+    "Sony Cybershot DSC S70": 7.11,
+    "Sony Cybershot DSC S700": 5.75,
+    "Sony Cybershot DSC S730": 5.75,
+    "Sony Cybershot DSC S75": 7.11,
+    "Sony Cybershot DSC S750": 5.75,
+    "Sony Cybershot DSC S780": 5.75,
+    "Sony Cybershot DSC S80": 5.33,
+    "Sony Cybershot DSC S800": 7.11,
+    "Sony Cybershot DSC S85": 7.11,
+    "Sony Cybershot DSC S90": 5.33,
+    "Sony Cybershot DSC S930": 6.08,
+    "Sony Cybershot DSC S950": 6.16,
+    "Sony Cybershot DSC S980": 6.08,
+    "Sony Cybershot DSC T1": 5.9,
+    "Sony Cybershot DSC T10": 5.75,
+    "Sony Cybershot DSC T100": 5.75,
+    "Sony Cybershot DSC T11": 5.9,
+    "Sony Cybershot DSC T110": 6.16,
+    "Sony Cybershot DSC T2": 5.75,
+    "Sony Cybershot DSC T20": 5.75,
+    "Sony Cybershot DSC T200": 5.75,
+    "Sony Cybershot DSC T3": 5.9,
+    "Sony Cybershot DSC T30": 5.75,
+    "Sony Cybershot DSC T300": 6.16,
+    "Sony Cybershot DSC T33": 5.9,
+    "Sony Cybershot DSC T5": 5.75,
+    "Sony Cybershot DSC T50": 5.75,
+    "Sony Cybershot DSC T500": 6.16,
+    "Sony Cybershot DSC T7": 5.9,
+    "Sony Cybershot DSC T70": 5.75,
+    "Sony Cybershot DSC T700": 6.16,
+    "Sony Cybershot DSC T77": 6.16,
+    "Sony Cybershot DSC T9": 5.75,
+    "Sony Cybershot DSC T90": 6.16,
+    "Sony Cybershot DSC T900": 6.16,
+    "Sony Cybershot DSC T99": 6.16,
+    "Sony Cybershot DSC TX1": 5.9,
+    "Sony Cybershot DSC TX10": 6.16,
+    "Sony Cybershot DSC TX100V": 6.16,
+    "Sony Cybershot DSC TX20": 6.16,
+    "Sony Cybershot DSC TX200V": 6.16,
+    "Sony Cybershot DSC TX5": 5.9,
+    "Sony Cybershot DSC TX55": 6.16,
+    "Sony Cybershot DSC TX66": 6.16,
+    "Sony Cybershot DSC TX7": 5.9,
+    "Sony Cybershot DSC TX9": 6.16,
+    "Sony Cybershot DSC U10": 5.33,
+    "Sony Cybershot DSC U20": 5.33,
+    "Sony Cybershot DSC U30": 5.33,
+    "Sony Cybershot DSC U40": 5.33,
+    "Sony Cybershot DSC U50": 5.33,
+    "Sony Cybershot DSC U60": 5.33,
+    "Sony Cybershot DSC V1": 7.11,
+    "Sony Cybershot DSC V3": 7.11,
+    "Sony Cybershot DSC W1": 7.11,
+    "Sony Cybershot DSC W100": 7.11,
+    "Sony Cybershot DSC W110": 5.75,
+    "Sony Cybershot DSC W115": 5.75,
+    "Sony Cybershot DSC W12": 7.11,
+    "Sony Cybershot DSC W120": 5.75,
+    "Sony Cybershot DSC W125": 5.75,
+    "Sony Cybershot DSC W130": 5.75,
+    "Sony Cybershot DSC W150": 5.75,
+    "Sony Cybershot DSC W17": 7.11,
+    "Sony Cybershot DSC W170": 6.16,
+    "Sony Cybershot DSC W180": 6.08,
+    "Sony Cybershot DSC W190": 6.08,
+    "Sony Cybershot DSC W200": 7.53,
+    "Sony Cybershot DSC W210": 6.16,
+    "Sony Cybershot DSC W215": 6.16,
+    "Sony Cybershot DSC W220": 6.16,
+    "Sony Cybershot DSC W230": 6.16,
+    "Sony Cybershot DSC W270": 7.53,
+    "Sony Cybershot DSC W275": 6.16,
+    "Sony Cybershot DSC W290": 6.16,
+    "Sony Cybershot DSC W30": 5.75,
+    "Sony Cybershot DSC W300": 7.53,
+    "Sony Cybershot DSC W310": 6.16,
+    "Sony Cybershot DSC W320": 6.16,
+    "Sony Cybershot DSC W330": 6.16,
+    "Sony Cybershot DSC W35": 5.75,
+    "Sony Cybershot DSC W350": 6.16,
+    "Sony Cybershot DSC W360": 6.16,
+    "Sony Cybershot DSC W370": 6.16,
+    "Sony Cybershot DSC W380": 6.16,
+    "Sony Cybershot DSC W40": 5.75,
+    "Sony Cybershot DSC W5": 7.11,
+    "Sony Cybershot DSC W50": 5.75,
+    "Sony Cybershot DSC W510": 6.16,
+    "Sony Cybershot DSC W520": 6.16,
+    "Sony Cybershot DSC W530": 6.16,
+    "Sony Cybershot DSC W55": 5.75,
+    "Sony Cybershot DSC W550": 6.16,
+    "Sony Cybershot DSC W560": 6.16,
+    "Sony Cybershot DSC W570": 6.16,
+    "Sony Cybershot DSC W580": 6.16,
+    "Sony Cybershot DSC W610": 6.16,
+    "Sony Cybershot DSC W620": 6.16,
+    "Sony Cybershot DSC W630": 6.16,
+    "Sony Cybershot DSC W650": 6.16,
+    "Sony Cybershot DSC W670": 6.16,
+    "Sony Cybershot DSC W690": 6.16,
+    "Sony Cybershot DSC W7": 7.11,
+    "Sony Cybershot DSC W70": 5.75,
+    "Sony Cybershot DSC W710": 6.16,
+    "Sony Cybershot DSC W730": 6.16,
+    "Sony Cybershot DSC W80": 5.75,
+    "Sony Cybershot DSC W800": 6.16,
+    "Sony Cybershot DSC W810": 6.16,
+    "Sony Cybershot DSC W830": 6.16,
+    "Sony Cybershot DSC W85": 5.75,
+    "Sony Cybershot DSC W90": 5.75,
+    "Sony Cybershot DSC WX1": 5.9,
+    "Sony Cybershot DSC WX10": 6.16,
+    "Sony Cybershot DSC WX100": 6.16,
+    "Sony Cybershot DSC WX150": 6.16,
+    "Sony Cybershot DSC WX220": 6.16,
+    "Sony Cybershot DSC WX30": 6.16,
+    "Sony Cybershot DSC WX350": 6.16,
+    "Sony Cybershot DSC WX5": 6.16,
+    "Sony Cybershot DSC WX50": 6.16,
+    "Sony Cybershot DSC WX7": 6.16,
+    "Sony Cybershot DSC WX70": 6.16,
+    "Sony Cybershot DSC WX9": 6.16,
+    "Sony Cybershot DSC-HX300": 6.16,
+    "Sony Cybershot DSC-HX400": 6.16,
+    "Sony Cybershot DSC-QX10": 6.16,
+    "Sony Cybershot DSC-QX100": 13.2,
+    "Sony Cybershot DSC-RX10": 13.2,
+    "Sony Cybershot DSC-RX100 III": 13.2,
+    "Sony Cybershot DSC-TF1": 6.16,
+    "Sony Cybershot DSC-TX30": 6.16,
+    "Sony Cybershot DSC-WX200": 6.16,
+    "Sony Cybershot DSC-WX300": 6.16,
+    "Sony Cybershot DSC-WX60": 6.16,
+    "Sony Cybershot DSC-WX80": 6.16,
+    "Sony D5503": 6.17,
+    "Sony Mavica CD1000": 5.33,
+    "Sony Mavica CD200": 5.33,
+    "Sony Mavica CD250": 5.33,
+    "Sony Mavica CD300": 7.11,
+    "Sony Mavica CD350": 5.33,
+    "Sony Mavica CD400": 7.11,
+    "Sony Mavica CD500": 7.11,
+    "Sony Mavica FD-100": 5.33,
+    "Sony Mavica FD-200": 5.33,
+    "Sony Mavica FD-71": 6.4,
+    "Sony Mavica FD-73": 6.4,
+    "Sony Mavica FD-75": 7.11,
+    "Sony Mavica FD-81": 4.8,
+    "Sony Mavica FD-83": 4.8,
+    "Sony Mavica FD-85": 5.33,
+    "Sony Mavica FD-87": 5.33,
+    "Sony Mavica FD-88": 4.8,
+    "Sony Mavica FD-90": 5.33,
+    "Sony Mavica FD-91": 4.8,
+    "Sony Mavica FD-92": 5.33,
+    "Sony Mavica FD-95": 5.33,
+    "Sony Mavica FD-97": 5.33,
+    "Sony QX1": 23.5,
+    "Sony QX30": 6.16,
+    "Sony SLT-A33": 23.5,
+    "Sony SLT-A35": 23.5,
+    "Sony SLT-A37": 23.5,
+    "Sony SLT-A55": 23.5,
+    "Sony SLT-A57": 23.5,
+    "Sony SLT-A58": 23.2,
+    "Sony SLT-A65": 23.5,
+    "Sony SLT-A77": 23.5,
+    "Sony SLT-A99": 35.8,
+    "Teracube Teracube One": 4.71,
+    "Toshiba PDR 2300": 5.33,
+    "Toshiba PDR 3300": 7.11,
+    "Toshiba PDR 3310": 7.11,
+    "Toshiba PDR 3320": 7.11,
+    "Toshiba PDR 4300": 7.11,
+    "Toshiba PDR 5300": 7.11,
+    "Toshiba PDR M25": 5.33,
+    "Toshiba PDR M5": 6.4,
+    "Toshiba PDR M500": 5.33,
+    "Toshiba PDR M60": 6.4,
+    "Toshiba PDR M61": 6.4,
+    "Toshiba PDR M65": 6.4,
+    "Toshiba PDR M70": 7.11,
+    "Toshiba PDR M700": 5.33,
+    "Toshiba PDR M71": 7.11,
+    "Toshiba PDR M81": 7.11,
+    "Toshiba PDR T10": 5.33,
+    "Toshiba PDR T20": 5.33,
+    "Toshiba PDR T30": 5.33,
+    "Vivitar V8025": 7.11,
+    "Vivitar ViviCam 5105s": 5.75,
+    "Vivitar ViviCam 5150s": 5.75,
+    "Vivitar ViviCam 5160s": 5.75,
+    "Vivitar ViviCam 5195": 5.75,
+    "Vivitar ViviCam 5350s": 5.75,
+    "Vivitar ViviCam 5355": 5.75,
+    "Vivitar ViviCam 5385": 5.75,
+    "Vivitar ViviCam 5386": 5.75,
+    "Vivitar ViviCam 5388": 5.75,
+    "Vivitar ViviCam 6150s": 5.75,
+    "Vivitar ViviCam 6200w": 5.75,
+    "Vivitar ViviCam 6300": 5.75,
+    "Vivitar ViviCam 6320": 5.75,
+    "Vivitar ViviCam 6326": 5.75,
+    "Vivitar ViviCam 6330": 5.75,
+    "Vivitar ViviCam 6380u": 5.75,
+    "Vivitar ViviCam 6385u": 5.75,
+    "Vivitar ViviCam 6388s": 5.75,
+    "Vivitar ViviCam 7100s": 5.75,
+    "Vivitar ViviCam 7310": 5.75,
+    "Vivitar ViviCam 7388s": 5.75,
+    "Vivitar ViviCam 7500i": 5.75,
+    "Vivitar ViviCam 8300s": 7.11,
+    "Vivitar ViviCam 8400": 7.11,
+    "Vivitar ViviCam 8600": 7.11,
+    "Vivitar ViviCam 8600s": 7.11,
+    "Vivitar ViviCam 8625": 7.11,
+    "Vivitar ViviCam X30": 7.11,
+    "Vivitar ViviCam X60": 7.11,
+    "Yakumo CamMaster SD 432": 5.33,
+    "Yakumo CamMaster SD 482": 5.75,
+    "Yakumo Mega Image 34": 5.33,
+    "Yakumo Mega Image 35": 7.11,
+    "Yakumo Mega Image 37": 5.75,
+    "Yakumo Mega Image 410": 5.75,
+    "Yakumo Mega Image 45": 7.11,
+    "Yakumo Mega Image 47": 5.75,
+    "Yakumo Mega Image 47 SL": 5.75,
+    "Yakumo Mega Image 47sx": 7.11,
+    "Yakumo Mega Image 55cx": 7.11,
+    "Yakumo Mega Image 57": 7.11,
+    "Yakumo Mega Image 57x": 7.11,
+    "Yakumo Mega Image 610x": 7.11,
+    "Yakumo Mega Image 67x": 7.11,
+    "Yakumo Mega Image 811x": 7.11,
+    "Yakumo Mega Image 84 D": 5.75,
+    "Yakumo Mega Image 85D": 5.75,
+    "Yakumo Mega Image II": 7.11,
+    "Yakumo Mega Image IV": 7.11,
+    "Yakumo Mega Image VI": 7.11,
+    "Yakumo Mega Image VII": 6.4,
+    "Yakumo Mega Image X": 7.11,
+    "Yakumo Mega Image XL": 4.23,
+    "Yakumo Mega Image XS": 6.4,
+    "YTXJ02FM": 6.17,
+    "Yuneec E90": 13.2,
+    "Google Nexus S": 3.9,
+    "HUAWEI HUAWEI P6-U06": 4.8,
+    "oneplu A000": 4.8,
+    "SAMSUNG GT-I9300": 4.53,
+    "SAMSUNG GT-I9195": 4.54,
+    "LG Electronics LG-D390n": 4.8,
+    "Sony Xperia Z1": 6.17,
+    "Apple iPhone7,2": 6.1,
+    "LG Electronics LG-D855": 4.8,
+    "Canon EOS DIGITAL REBEL XT": 22.2,
+    "Canon EOS DIGITAL REBEL XTi": 22.2,
+    "Canon EOS Kiss Digital": 22.66,
+    "Canon IXY DIGITAL 600": 7.176,
+    "DJI PHANTOM VISION FC200": 6.17,
+    "GoPro HD2 U": 5.8,
+    "GoPro HERO4 Black": 6.17,
+    "GoPro HERO4 Silver": 6.17,
+    "Gopro HD3": 5.76,
+    "KONICA MILOLTA  DYNAX 5D": 23.5,
+    "Mantis i23": 45.0,
+    "Mobius Action Cam": 2.1,
+    "NIKON COOLPIX L3": 5.76,
+    "NIKON E2500": 5.27,
+    "NIKON E3100": 5.27,
+    "NIKON E3200": 5.27,
+    "NIKON E3700": 5.27,
+    "NIKON E4200": 7.176,
+    "NIKON E4300": 7.18,
+    "NIKON E4500": 7.176,
+    "NIKON E4600": 5.76,
+    "NIKON E5000": 8.8,
+    "NIKON E5200": 7.176,
+    "NIKON E5400": 7.176,
+    "NIKON E5600": 5.76,
+    "NIKON E5700": 8.8,
+    "NIKON E5900": 7.176,
+    "NIKON E7600": 7.176,
+    "NIKON E775": 5.27,
+    "NIKON E7900": 7.176,
+    "NIKON E8800": 8.8,
+    "NIKON E990": 7.176,
+    "NIKON E995": 7.176,
+    "Nokia N80": 5.27,
+    "Nokia N93": 4.536,
+    "Nokia N95": 5.7,
+    "OLYMPUS_IMAGING_CORP.   X450,D535Z,C370Z": 5.27,
+    "PHANTOM VISION FC200": 6.17,
+    "SAMSUNG TECHWIN Pro 815": 8.8,
+    "SONY DSC-HX5V": 6.104,
+    "SONY DSC-N12": 7.176,
+    "SONY ILCE-5100": 23.5,
+    "SONY ILCE-7S": 35.8,
+    "SONY SLT-A77V": 23.5,
+    "GITUP GIT2": 6.216,
+    "DJI ZH20T": 7.68,
+    "DJI XT2": 10.88,
+    "DJI ZENMUSEP1": 35.9
+}
diff --git a/utils/tools.py b/utils/tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..adbc3eae97c599d9ce606426ae5628484b9e2499
--- /dev/null
+++ b/utils/tools.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import time
+
+
+class Timer:
+    def __init__(self, name=None):
+        self.name = name
+
+    def __enter__(self):
+        self.tstart = time.time()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.duration = time.time() - self.tstart
+        if self.name is not None:
+            print("[%s] Elapsed: %s" % (self.name, self.duration))
diff --git a/utils/viz_2d.py b/utils/viz_2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..09123e81160196f905580e0432847fa97247e468
--- /dev/null
+++ b/utils/viz_2d.py
@@ -0,0 +1,195 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# Adapted from Hierarchical-Localization, Paul-Edouard Sarlin, ETH Zurich
+# https://github.com/cvg/Hierarchical-Localization/blob/master/hloc/utils/viz.py
+# Released under the Apache License 2.0
+
+import matplotlib
+import matplotlib.patheffects as path_effects
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def plot_images(imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True):
+    """Plot a set of images horizontally.
+    Args:
+        imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images.
+        adaptive: whether the figure size should fit the image aspect ratios.
+    """
+    n = len(imgs)
+    if not isinstance(cmaps, (list, tuple)):
+        cmaps = [cmaps] * n
+
+    if adaptive:
+        ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
+    else:
+        ratios = [4 / 3] * n
+    figsize = [sum(ratios) * 4.5, 4.5]
+    fig, ax = plt.subplots(
+        1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
+    )
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)
+    # fig.show()
+
+
+def plot_keypoints(kpts, colors="lime", ps=4):
+    """Plot keypoints for existing images.
+    Args:
+        kpts: list of ndarrays of size (N, 2).
+        colors: string, or list of list of tuples (one for each keypoints).
+        ps: size of the keypoints as float.
+    """
+    if not isinstance(colors, list):
+        colors = [colors] * len(kpts)
+    axes = plt.gcf().axes
+    for a, k, c in zip(axes, kpts, colors):
+        a.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0)
+
+
+def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
+    """Plot matches for a pair of existing images.
+    Args:
+        kpts0, kpts1: corresponding keypoints of size (N, 2).
+        color: color of each match, string or RGB tuple. Random if not given.
+        lw: width of the lines.
+        ps: size of the end points (no endpoint if ps=0)
+        indices: indices of the images to draw the matches on.
+        a: alpha opacity of the match lines.
+    """
+    fig = plt.gcf()
+    ax = fig.axes
+    assert len(ax) > max(indices)
+    ax0, ax1 = ax[indices[0]], ax[indices[1]]
+    fig.canvas.draw()
+
+    assert len(kpts0) == len(kpts1)
+    if color is None:
+        color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
+    elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
+        color = [color] * len(kpts0)
+
+    if lw > 0:
+        # transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(ax0.transData.transform(kpts0))
+        fkpts1 = transFigure.transform(ax1.transData.transform(kpts1))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (fkpts0[i, 0], fkpts1[i, 0]),
+                (fkpts0[i, 1], fkpts1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=color[i],
+                linewidth=lw,
+                alpha=a,
+            )
+            for i in range(len(kpts0))
+        ]
+
+    # freeze the axes to prevent the transform to change
+    ax0.autoscale(enable=False)
+    ax1.autoscale(enable=False)
+
+    if ps > 0:
+        ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
+        ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
+
+
+def add_text(
+    idx,
+    text,
+    pos=(0.01, 0.99),
+    fs=15,
+    color="w",
+    lcolor="k",
+    lwidth=2,
+    ha="left",
+    va="top",
+    normalized=True,
+    zorder=3,
+):
+    ax = plt.gcf().axes[idx]
+    tfm = ax.transAxes if normalized else ax.transData
+    t = ax.text(
+        *pos,
+        text,
+        fontsize=fs,
+        ha=ha,
+        va=va,
+        color=color,
+        transform=tfm,
+        clip_on=True,
+        zorder=zorder,
+    )
+    if lcolor is not None:
+        t.set_path_effects(
+            [
+                path_effects.Stroke(linewidth=lwidth, foreground=lcolor),
+                path_effects.Normal(),
+            ]
+        )
+
+
+def save_plot(path, **kw):
+    """Save the current figure without any white margin."""
+    plt.savefig(path, bbox_inches="tight", pad_inches=0, **kw)
+
+
+def features_to_RGB(*Fs, masks=None, skip=1):
+    """Project a list of d-dimensional feature maps to RGB colors using PCA."""
+    from sklearn.decomposition import PCA
+
+    def normalize(x):
+        return x / np.linalg.norm(x, axis=-1, keepdims=True)
+
+    if masks is not None:
+        assert len(Fs) == len(masks)
+
+    flatten = []
+    for i, F in enumerate(Fs):
+        c, h, w = F.shape
+        F = np.rollaxis(F, 0, 3)
+        F_flat = F.reshape(-1, c)
+        if masks is not None and masks[i] is not None:
+            mask = masks[i]
+            assert mask.shape == F.shape[:2]
+            F_flat = F_flat[mask.reshape(-1)]
+        flatten.append(F_flat)
+    flatten = np.concatenate(flatten, axis=0)
+    flatten = normalize(flatten)
+
+    pca = PCA(n_components=3)
+    if skip > 1:
+        pca.fit(flatten[::skip])
+        flatten = pca.transform(flatten)
+    else:
+        flatten = pca.fit_transform(flatten)
+    flatten = (normalize(flatten) + 1) / 2
+
+    Fs_rgb = []
+    for i, F in enumerate(Fs):
+        h, w = F.shape[-2:]
+        if masks is None or masks[i] is None:
+            F_rgb, flatten = np.split(flatten, [h * w], axis=0)
+            F_rgb = F_rgb.reshape((h, w, 3))
+        else:
+            F_rgb = np.zeros((h, w, 3))
+            indices = np.where(masks[i])
+            F_rgb[indices], flatten = np.split(flatten, [len(indices[0])], axis=0)
+            F_rgb = np.concatenate([F_rgb, masks[i][..., None]], axis=-1)
+        Fs_rgb.append(F_rgb)
+    assert flatten.shape[0] == 0, flatten.shape
+    return Fs_rgb
diff --git a/utils/viz_localization.py b/utils/viz_localization.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e15da2329ae2956c4eb663dfcd3a0cf841f2b84
--- /dev/null
+++ b/utils/viz_localization.py
@@ -0,0 +1,160 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+import copy
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+
+def likelihood_overlay(
+    prob, map_viz=None, p_rgb=0.2, p_alpha=1 / 15, thresh=None, cmap="jet"
+):
+    prob = prob / prob.max()
+    cmap = plt.get_cmap(cmap)
+    rgb = cmap(prob**p_rgb)
+    alpha = prob[..., None] ** p_alpha
+    if thresh is not None:
+        alpha[prob <= thresh] = 0
+    if map_viz is not None:
+        faded = map_viz + (1 - map_viz) * 0.5
+        rgb = rgb[..., :3] * alpha + faded * (1 - alpha)
+        rgb = np.clip(rgb, 0, 1)
+    else:
+        rgb[..., -1] = alpha.squeeze(-1)
+    return rgb
+
+
+def heatmap2rgb(scores, mask=None, clip_min=0.05, alpha=0.8, cmap="jet"):
+    min_, max_ = np.quantile(scores, [clip_min, 1])
+    scores = scores.clip(min=min_)
+    rgb = plt.get_cmap(cmap)((scores - min_) / (max_ - min_))
+    if mask is not None:
+        if alpha == 0:
+            rgb[mask] = np.nan
+        else:
+            rgb[..., -1] = 1 - (1 - 1.0 * mask) * (1 - alpha)
+    return rgb
+
+
+def plot_pose(axs, xy, yaw=None, s=1 / 35, c="r", a=1, w=0.015, dot=True, zorder=10):
+    if yaw is not None:
+        yaw = np.deg2rad(yaw)
+        uv = np.array([np.sin(yaw), -np.cos(yaw)])
+    xy = np.array(xy) + 0.5
+    if not isinstance(axs, list):
+        axs = [axs]
+    for ax in axs:
+        if isinstance(ax, int):
+            ax = plt.gcf().axes[ax]
+        if dot:
+            ax.scatter(*xy, c=c, s=70, zorder=zorder, linewidths=0, alpha=a)
+        if yaw is not None:
+            ax.quiver(
+                *xy,
+                *uv,
+                scale=s,
+                scale_units="xy",
+                angles="xy",
+                color=c,
+                zorder=zorder,
+                alpha=a,
+                width=w,
+            )
+
+
+def plot_dense_rotations(
+    ax, prob, thresh=0.01, skip=10, s=1 / 15, k=3, c="k", w=None, **kwargs
+):
+    t = torch.argmax(prob, -1)
+    yaws = t.numpy() / prob.shape[-1] * 360
+    prob = prob.max(-1).values / prob.max()
+    mask = prob > thresh
+    masked = prob.masked_fill(~mask, 0)
+    max_ = torch.nn.functional.max_pool2d(
+        masked.float()[None, None], k, stride=1, padding=k // 2
+    )
+    mask = (max_[0, 0] == masked.float()) & mask
+    indices = np.where(mask.numpy() > 0)
+    plot_pose(
+        ax,
+        indices[::-1],
+        yaws[indices],
+        s=s,
+        c=c,
+        dot=False,
+        zorder=0.1,
+        w=w,
+        **kwargs,
+    )
+
+
+def copy_image(im, ax):
+    prop = im.properties()
+    prop.pop("children")
+    prop.pop("size")
+    prop.pop("tightbbox")
+    prop.pop("transformed_clip_path_and_affine")
+    prop.pop("window_extent")
+    prop.pop("figure")
+    prop.pop("transform")
+    return ax.imshow(im.get_array(), **prop)
+
+
+def add_circle_inset(
+    ax,
+    center,
+    corner=None,
+    radius_px=10,
+    inset_size=0.4,
+    inset_offset=0.005,
+    color="red",
+):
+    data_t_axes = ax.transAxes + ax.transData.inverted()
+    if corner is None:
+        center_axes = np.array(data_t_axes.inverted().transform(center))
+        corner = 1 - np.round(center_axes).astype(int)
+    corner = np.array(corner)
+    bottom_left = corner * (1 - inset_size - inset_offset) + (1 - corner) * inset_offset
+    axins = ax.inset_axes([*bottom_left, inset_size, inset_size])
+    if ax.yaxis_inverted():
+        axins.invert_yaxis()
+    axins.set_axis_off()
+
+    c = mpl.patches.Circle(center, radius_px, fill=False, color=color)
+    c1 = mpl.patches.Circle(center, radius_px, fill=False, color=color)
+    # ax.add_patch(c)
+    ax.add_patch(c1)
+    # ax.add_patch(c.frozen())
+    axins.add_patch(c)
+
+    radius_inset = radius_px + 1
+    axins.set_xlim([center[0] - radius_inset, center[0] + radius_inset])
+    ylim = center[1] - radius_inset, center[1] + radius_inset
+    if axins.yaxis_inverted():
+        ylim = ylim[::-1]
+    axins.set_ylim(ylim)
+
+    for im in ax.images:
+        im2 = copy_image(im, axins)
+        im2.set_clip_path(c)
+    return axins
+
+
+def plot_bev(bev, uv, yaw, ax=None, zorder=10, **kwargs):
+    if ax is None:
+        ax = plt.gca()
+    h, w = bev.shape[:2]
+    tfm = mpl.transforms.Affine2D().translate(-w / 2, -h)
+    tfm = tfm.rotate_deg(yaw).translate(*uv + 0.5)
+    tfm += plt.gca().transData
+    ax.imshow(bev, transform=tfm, zorder=zorder, **kwargs)
+    ax.plot(
+        [0, w - 1, w / 2, 0],
+        [0, 0, h - 0.5, 0],
+        transform=tfm,
+        c="k",
+        lw=1,
+        zorder=zorder + 1,
+    )
diff --git a/utils/wrappers.py b/utils/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d8b35a35fdc83569e943735e7433ed72da1343e
--- /dev/null
+++ b/utils/wrappers.py
@@ -0,0 +1,342 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
+# https://github.com/cvg/pixloc
+# Released under the Apache License 2.0
+
+"""
+Convenience classes for an SE3 pose and a pinhole Camera with lens distortion.
+Based on PyTorch tensors: differentiable, batched, with GPU support.
+"""
+
+import functools
+import inspect
+import math
+from typing import Dict, List, NamedTuple, Tuple, Union
+
+import numpy as np
+import torch
+
+from .geometry import undistort_points
+
+
+def autocast(func):
+    """Cast the inputs of a TensorWrapper method to PyTorch tensors
+    if they are numpy arrays. Use the device and dtype of the wrapper.
+    """
+
+    @functools.wraps(func)
+    def wrap(self, *args):
+        device = torch.device("cpu")
+        dtype = None
+        if isinstance(self, TensorWrapper):
+            if self._data is not None:
+                device = self.device
+                dtype = self.dtype
+        elif not inspect.isclass(self) or not issubclass(self, TensorWrapper):
+            raise ValueError(self)
+
+        cast_args = []
+        for arg in args:
+            if isinstance(arg, np.ndarray):
+                arg = torch.from_numpy(arg)
+                arg = arg.to(device=device, dtype=dtype)
+            cast_args.append(arg)
+        return func(self, *cast_args)
+
+    return wrap
+
+
+class TensorWrapper:
+    _data = None
+
+    @autocast
+    def __init__(self, data: torch.Tensor):
+        self._data = data
+
+    @property
+    def shape(self):
+        return self._data.shape[:-1]
+
+    @property
+    def device(self):
+        return self._data.device
+
+    @property
+    def dtype(self):
+        return self._data.dtype
+
+    def __getitem__(self, index):
+        return self.__class__(self._data[index])
+
+    def __setitem__(self, index, item):
+        self._data[index] = item.data
+
+    def to(self, *args, **kwargs):
+        return self.__class__(self._data.to(*args, **kwargs))
+
+    def cpu(self):
+        return self.__class__(self._data.cpu())
+
+    def cuda(self):
+        return self.__class__(self._data.cuda())
+
+    def pin_memory(self):
+        return self.__class__(self._data.pin_memory())
+
+    def float(self):
+        return self.__class__(self._data.float())
+
+    def double(self):
+        return self.__class__(self._data.double())
+
+    def detach(self):
+        return self.__class__(self._data.detach())
+
+    @classmethod
+    def stack(cls, objects: List, dim=0, *, out=None):
+        data = torch.stack([obj._data for obj in objects], dim=dim, out=out)
+        return cls(data)
+
+    @classmethod
+    def __torch_function__(cls, func, types, args=(), kwargs=None):
+        if kwargs is None:
+            kwargs = {}
+        if func is torch.stack:
+            return cls.stack(*args, **kwargs)
+        else:
+            return NotImplemented
+
+
+class Pose(TensorWrapper):
+    def __init__(self, data: torch.Tensor):
+        assert data.shape[-1] == 12
+        super().__init__(data)
+
+    @classmethod
+    @autocast
+    def from_Rt(cls, R: torch.Tensor, t: torch.Tensor):
+        """Pose from a rotation matrix and translation vector.
+        Accepts numpy arrays or PyTorch tensors.
+
+        Args:
+            R: rotation matrix with shape (..., 3, 3).
+            t: translation vector with shape (..., 3).
+        """
+        assert R.shape[-2:] == (3, 3)
+        assert t.shape[-1] == 3
+        assert R.shape[:-2] == t.shape[:-1]
+        data = torch.cat([R.flatten(start_dim=-2), t], -1)
+        return cls(data)
+
+    @classmethod
+    def from_4x4mat(cls, T: torch.Tensor):
+        """Pose from an SE(3) transformation matrix.
+        Args:
+            T: transformation matrix with shape (..., 4, 4).
+        """
+        assert T.shape[-2:] == (4, 4)
+        R, t = T[..., :3, :3], T[..., :3, 3]
+        return cls.from_Rt(R, t)
+
+    @classmethod
+    def from_colmap(cls, image: NamedTuple):
+        """Pose from a COLMAP Image."""
+        return cls.from_Rt(image.qvec2rotmat(), image.tvec)
+
+    @property
+    def R(self) -> torch.Tensor:
+        """Underlying rotation matrix with shape (..., 3, 3)."""
+        rvec = self._data[..., :9]
+        return rvec.reshape(rvec.shape[:-1] + (3, 3))
+
+    @property
+    def t(self) -> torch.Tensor:
+        """Underlying translation vector with shape (..., 3)."""
+        return self._data[..., -3:]
+
+    def inv(self) -> "Pose":
+        """Invert an SE(3) pose."""
+        R = self.R.transpose(-1, -2)
+        t = -(R @ self.t.unsqueeze(-1)).squeeze(-1)
+        return self.__class__.from_Rt(R, t)
+
+    def compose(self, other: "Pose") -> "Pose":
+        """Chain two SE(3) poses: T_B2C.compose(T_A2B) -> T_A2C."""
+        R = self.R @ other.R
+        t = self.t + (self.R @ other.t.unsqueeze(-1)).squeeze(-1)
+        return self.__class__.from_Rt(R, t)
+
+    @autocast
+    def transform(self, p3d: torch.Tensor) -> torch.Tensor:
+        """Transform a set of 3D points.
+        Args:
+            p3d: 3D points, numpy array or PyTorch tensor with shape (..., 3).
+        """
+        assert p3d.shape[-1] == 3
+        # assert p3d.shape[:-2] == self.shape  # allow broadcasting
+        return p3d @ self.R.transpose(-1, -2) + self.t.unsqueeze(-2)
+
+    def __matmul__(
+        self, other: Union["Pose", torch.Tensor]
+    ) -> Union["Pose", torch.Tensor]:
+        """Transform a set of 3D points: T_A2B * p3D_A -> p3D_B.
+        or chain two SE(3) poses: T_B2C @ T_A2B -> T_A2C."""
+        if isinstance(other, self.__class__):
+            return self.compose(other)
+        else:
+            return self.transform(other)
+
+    def numpy(self) -> Tuple[np.ndarray]:
+        return self.R.numpy(), self.t.numpy()
+
+    def magnitude(self) -> Tuple[torch.Tensor]:
+        """Magnitude of the SE(3) transformation.
+        Returns:
+            dr: rotation anngle in degrees.
+            dt: translation distance in meters.
+        """
+        trace = torch.diagonal(self.R, dim1=-1, dim2=-2).sum(-1)
+        cos = torch.clamp((trace - 1) / 2, -1, 1)
+        dr = torch.acos(cos).abs() / math.pi * 180
+        dt = torch.norm(self.t, dim=-1)
+        return dr, dt
+
+    def __repr__(self):
+        return f"Pose: {self.shape} {self.dtype} {self.device}"
+
+
+class Camera(TensorWrapper):
+    eps = 1e-4
+
+    def __init__(self, data: torch.Tensor):
+        assert data.shape[-1] in {6, 8, 10}
+        super().__init__(data)
+
+    @classmethod
+    def from_dict(cls, camera: Union[Dict, NamedTuple]):
+        """Camera from a COLMAP Camera tuple or dictionary.
+        We assume that the origin (0, 0) is the center of the top-left pixel.
+        This is different from COLMAP.
+        """
+        if isinstance(camera, tuple):
+            camera = camera._asdict()
+
+        model = camera["model"]
+        params = camera["params"]
+
+        if model in ["OPENCV", "PINHOLE"]:
+            (fx, fy, cx, cy), params = np.split(params, [4])
+        elif model in ["SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"]:
+            (f, cx, cy), params = np.split(params, [3])
+            fx = fy = f
+            if model == "SIMPLE_RADIAL":
+                params = np.r_[params, 0.0]
+        else:
+            raise NotImplementedError(model)
+
+        data = np.r_[
+            camera["width"], camera["height"], fx, fy, cx - 0.5, cy - 0.5, params
+        ]
+        return cls(data)
+
+    @property
+    def size(self) -> torch.Tensor:
+        """Size (width height) of the images, with shape (..., 2)."""
+        return self._data[..., :2]
+
+    @property
+    def f(self) -> torch.Tensor:
+        """Focal lengths (fx, fy) with shape (..., 2)."""
+        return self._data[..., 2:4]
+
+    @property
+    def c(self) -> torch.Tensor:
+        """Principal points (cx, cy) with shape (..., 2)."""
+        return self._data[..., 4:6]
+
+    @property
+    def dist(self) -> torch.Tensor:
+        """Distortion parameters, with shape (..., {0, 2, 4})."""
+        return self._data[..., 6:]
+
+    def scale(self, scales: Union[float, int, Tuple[Union[float, int]]]):
+        """Update the camera parameters after resizing an image."""
+        if isinstance(scales, (int, float)):
+            scales = (scales, scales)
+        s = self._data.new_tensor(scales)
+        data = torch.cat(
+            [self.size * s, self.f * s, (self.c + 0.5) * s - 0.5, self.dist], -1
+        )
+        return self.__class__(data)
+
+    def crop(self, left_top: Tuple[float], size: Tuple[int]):
+        """Update the camera parameters after cropping an image."""
+        left_top = self._data.new_tensor(left_top)
+        size = self._data.new_tensor(size)
+        data = torch.cat([size, self.f, self.c - left_top, self.dist], -1)
+        return self.__class__(data)
+
+    @autocast
+    def in_image(self, p2d: torch.Tensor):
+        """Check if 2D points are within the image boundaries."""
+        assert p2d.shape[-1] == 2
+        # assert p2d.shape[:-2] == self.shape  # allow broadcasting
+        size = self.size.unsqueeze(-2)
+        valid = torch.all((p2d >= 0) & (p2d <= (size - 1)), -1)
+        return valid
+
+    @autocast
+    def project(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]:
+        """Project 3D points into the camera plane and check for visibility."""
+        z = p3d[..., -1]
+        valid = z > self.eps
+        z = z.clamp(min=self.eps)
+        p2d = p3d[..., :-1] / z.unsqueeze(-1)
+        return p2d, valid
+
+    def J_project(self, p3d: torch.Tensor):
+        x, y, z = p3d[..., 0], p3d[..., 1], p3d[..., 2]
+        zero = torch.zeros_like(z)
+        J = torch.stack([1 / z, zero, -x / z**2, zero, 1 / z, -y / z**2], dim=-1)
+        J = J.reshape(p3d.shape[:-1] + (2, 3))
+        return J  # N x 2 x 3
+
+    @autocast
+    def undistort(self, pts: torch.Tensor) -> Tuple[torch.Tensor]:
+        """Undistort normalized 2D coordinates
+        and check for validity of the distortion model.
+        """
+        assert pts.shape[-1] == 2
+        # assert pts.shape[:-2] == self.shape  # allow broadcasting
+        return undistort_points(pts, self.dist)
+
+    @autocast
+    def denormalize(self, p2d: torch.Tensor) -> torch.Tensor:
+        """Convert normalized 2D coordinates into pixel coordinates."""
+        return p2d * self.f.unsqueeze(-2) + self.c.unsqueeze(-2)
+
+    @autocast
+    def normalize(self, p2d: torch.Tensor) -> torch.Tensor:
+        """Convert pixel coordinates into normalized 2D coordinates."""
+        return (p2d - self.c.unsqueeze(-2)) / self.f.unsqueeze(-2)
+
+    def J_denormalize(self):
+        return torch.diag_embed(self.f).unsqueeze(-3)  # 1 x 2 x 2
+
+    @autocast
+    def world2image(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]:
+        """Transform 3D points into 2D pixel coordinates."""
+        p2d, visible = self.project(p3d)
+        p2d, mask = self.undistort(p2d)
+        p2d = self.denormalize(p2d)
+        valid = visible & mask & self.in_image(p2d)
+        return p2d, valid
+
+    def J_world2image(self, p3d: torch.Tensor):
+        p2d_dist, valid = self.project(p3d)
+        J = self.J_denormalize() @ self.J_undistort(p2d_dist) @ self.J_project(p3d)
+        return J, valid
+
+    def __repr__(self):
+        return f"Camera {self.shape} {self.dtype} {self.device}"