linear_start: 0.00085 linear_end: 0.0120 num_timesteps_cond: 1 log_every_t: 200 timesteps: 1000 first_stage_key: "jpg" cond_stage_key: "txt" image_size: 64 channels: 4 cond_stage_trainable: false conditioning_key: crossattn monitor: val/loss_simple_ema scale_factor: 0.18215 use_ema: False # we set this to false because this is an inference only config