{ "experiment": { "tokenizer_checkpoint": "tatitok_bl32_vae.bin", "generator_checkpoint": "maskgen_kl_l.bin" }, "model": { "vq_model": { "quantize_mode": "vae", "token_size": 16, "vit_enc_model_size": "base", "vit_dec_model_size": "large", "vit_enc_patch_size": 16, "vit_dec_patch_size": 16, "num_latent_tokens": 32, "scale_factor": 0.7525, "finetune_decoder": false, "is_legacy": false }, "maskgen": { "decoder_embed_dim": 1024, "decoder_depth": 16, "decoder_num_heads": 16, "micro_condition": true, "micro_condition_embed_dim": 256, "text_drop_prob": 0.1, "cfg": 3.0, "cfg_schedule": "linear", "num_iter": 32, "temperature": 1.0, "sample_aesthetic_score": 6.5 } }, "losses": { "diffloss_d": 8, "diffloss_w": 1024 }, "dataset": { "preprocessing": { "crop_size": 256 } } }