data_path: /mnt/localssd/ImageNet2012/train | |
data_face_path: | |
cloud_save_path: output/exp-vq | |
no_local_save: false | |
vq_model: VQ-16 | |
vq_ckpt: | |
finetune: false | |
ema: true | |
codebook_size: 4096 | |
codebook_embed_dim: 32 | |
codebook_l2_norm: true | |
codebook_weight: 1.0 | |
entropy_loss_ratio: 0.1 | |
commit_loss_beta: 0.25 | |
reconstruction_weight: 1.0 | |
reconstruction_loss: l2 | |
perceptual_weight: 1.0 | |
disc_weight: 0.5 | |
disc_epoch_start: 56 | |
disc_start: 0 | |
disc_type: dinodisc | |
disc_loss: hinge | |
gen_loss: hinge | |
compile: false | |
dropout_p: 0.0 | |
results_dir: results_tokenizer_image | |
dataset: imagenet | |
image_size: 256 | |
epochs: 200 | |
lr: 3e-5 | |
disc_lr: 0.0001 | |
max_grad_norm: 0.0 | |
lr_scheduler: cosine | |
weight_decay: 0.0 | |
disc_weight_decay: 0.0005 | |
beta1: 0.9 | |
beta2: 0.95 | |
global_batch_size: 1024 | |
global_seed: 0 | |
num_workers: 16 | |
log_every: 100 | |
vis_every: 5000 | |
ckpt_every: 10000 | |
gradient_accumulation_steps: 1 | |
mixed_precision: bf16 | |
save_best: true | |
val_data_path: /mnt/localssd/ImageNet2012/val | |
sample_folder_dir: samples | |
reconstruction_folder_dir: reconstruction | |
v_patch_nums: | |
- 16 | |
enc_type: dinov2 | |
dec_type: dinov2 | |
semantic_guide: dinov2 | |
num_latent_tokens: 256 | |
encoder_model: vit_base_patch14_dinov2.lvd142m | |
decoder_model: vit_base_patch14_dinov2.lvd142m | |
disc_adaptive_weight: true | |
abs_pos_embed: true | |
product_quant: 2 | |
share_quant_resi: 4 | |
codebook_drop: 0.1 | |
half_sem: true | |
start_drop: 3 | |
lecam_loss_weight: 0.001 | |
sem_loss_weight: 0.1 | |
enc_tuning_method: full | |
dec_tuning_method: full | |
clip_norm: false | |
sem_loss_scale: 1.0 | |
config: configs/tokenizer.yaml | |
norm_type: bn | |
aug_prob: 1.0 | |
aug_fade_steps: 0 | |
disc_reinit: 0 | |
debug_disc: false | |
rank: 0 | |
world_size: 32 | |
gpu: 0 | |
dist_url: env:// | |
distributed: true | |
dist_backend: nccl | |