dek924
/

ehrxdiff

+{
+  "model_type": "ehrxdiff",
+  "linear_start": 0.0015,
+  "linear_end": 0.0295,
+  "num_timesteps_cond": 1,
+  "log_every_t": 200,
+  "timesteps": 1000,
+  "first_stage_key": "target_img",
+  "cond_stage_key": "table, prev_img",
+  "image_size": 64,
+  "channels": 3,
+  "cond_stage_trainable": true,
+  "conditioning_key": "crossattn",
+  "monitor": "val/loss_simple_ema",
+  "scale_factor": 0.18215,
+  "use_ema": true,
+  "load_ema": false,
+  "unet_config": {
+    "target": "cheff.ldm.modules.diffusionmodules.openaimodel.UNetModel",
+    "params": {
+      "image_size": 64,
+      "in_channels": 3,
+      "out_channels": 3,
+      "model_channels": 224,
+      "attention_resolutions": [
+        8,
+        4,
+        2
+      ],
+      "num_res_blocks": 2,
+      "channel_mult": [
+        1,
+        2,
+        4,
+        4
+      ],
+      "num_heads": 8,
+      "use_spatial_transformer": true,
+      "transformer_depth": 1,
+      "context_dim": 768,
+      "use_checkpoint": true,
+      "legacy": false
+    }
+  },
+  "first_stage_config": {
+    "target": "cheff.ldm.models.autoencoder.AutoencoderKL",
+    "params": {
+      "embed_dim": 3,
+      "ckpt_path": null,
+      "ddconfig": {
+        "double_z": true,
+        "z_channels": 3,
+        "resolution": 256,
+        "in_channels": 3,
+        "out_ch": 3,
+        "ch": 128,
+        "ch_mult": [
+          1,
+          2,
+          4
+        ],
+        "num_res_blocks": 2,
+        "attn_resolutions": [],
+        "dropout": 0.0
+      },
+      "lossconfig": {
+        "target": "torch.nn.Identity"
+      }
+    }
+  },
+  "cond_stage_config": {
+    "target": "cheff.ldm.modules.encoders.modules.MultiModalTransformerAdaptor",
+    "params": {
+      "autoencoder_config": {
+        "embed_dim": 3,
+        "ckpt_path": null,
+        "ddconfig": {
+          "double_z": true,
+          "z_channels": 3,
+          "resolution": 256,
+          "in_channels": 3,
+          "out_ch": 3,
+          "ch": 128,
+          "ch_mult": [
+            1,
+            2,
+            4
+          ],
+          "num_res_blocks": 2,
+          "attn_resolutions": [],
+          "dropout": 0.0
+        },
+        "lossconfig": {
+          "target": "torch.nn.Identity"
+        }
+      },
+      "clip_visual_enc_config": {
+        "input_resolution": 256,
+        "layers": 12,
+        "width": 768,
+        "patch_size": 32,
+        "heads": 12
+      },
+      "clip_enc_checkpoint": "checkpoints/clip_vit32_256_1024.ckpt",
+      "context_dim": 768,
+      "condition_feat_dim": 1024,
+      "clip_trainable": true
+    }
+  }
+}