data_path: /mnt/localssd/ImageNet2012/train data_face_path: cloud_save_path: output/exp-vq no_local_save: false vq_model: VQ-16 vq_ckpt: finetune: false ema: true codebook_size: 4096 codebook_embed_dim: 32 codebook_l2_norm: true codebook_weight: 1.0 entropy_loss_ratio: 0.1 commit_loss_beta: 0.25 reconstruction_weight: 1.0 reconstruction_loss: l2 perceptual_weight: 1.0 disc_weight: 0.5 disc_epoch_start: 56 disc_start: 0 disc_type: dinodisc disc_loss: hinge gen_loss: hinge compile: false dropout_p: 0.0 results_dir: results_tokenizer_image dataset: imagenet image_size: 256 epochs: 200 lr: 3e-5 disc_lr: 0.0001 max_grad_norm: 0.0 lr_scheduler: cosine weight_decay: 0.0 disc_weight_decay: 0.0005 beta1: 0.9 beta2: 0.95 global_batch_size: 1024 global_seed: 0 num_workers: 16 log_every: 100 vis_every: 5000 ckpt_every: 10000 gradient_accumulation_steps: 1 mixed_precision: bf16 save_best: true val_data_path: /mnt/localssd/ImageNet2012/val sample_folder_dir: samples reconstruction_folder_dir: reconstruction v_patch_nums: - 16 enc_type: dinov2 dec_type: dinov2 semantic_guide: dinov2 num_latent_tokens: 256 encoder_model: vit_base_patch14_dinov2.lvd142m decoder_model: vit_base_patch14_dinov2.lvd142m disc_adaptive_weight: true abs_pos_embed: true product_quant: 2 share_quant_resi: 4 codebook_drop: 0.1 half_sem: true start_drop: 3 lecam_loss_weight: 0.001 sem_loss_weight: 0.1 enc_tuning_method: full dec_tuning_method: full clip_norm: false sem_loss_scale: 1.0 config: configs/tokenizer.yaml norm_type: bn aug_prob: 1.0 aug_fade_steps: 0 disc_reinit: 0 debug_disc: false rank: 0 world_size: 32 gpu: 0 dist_url: env:// distributed: true dist_backend: nccl