diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint.pt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint.pt new file mode 100644 index 0000000000000000000000000000000000000000..e54b777e4f849c3618b2e7e0db3b7f662c784a40 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eeb3becf23eef7cf10ba771a341c1eaebf8ead9e71719398c6d2bccbad629fc +size 10798884178 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint_.pt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint_.pt new file mode 100644 index 0000000000000000000000000000000000000000..e54b777e4f849c3618b2e7e0db3b7f662c784a40 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/checkpoint_.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eeb3becf23eef7cf10ba771a341c1eaebf8ead9e71719398c6d2bccbad629fc +size 10798884178 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/config.yaml b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..478156e6e5dbffc24737e3650ddd339e6fd46f6b --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/config.yaml @@ -0,0 +1,271 @@ +run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs +seed: 0 +allow_tf32: true +timeout: null +resolution: 256 +amp: bf16 +cfg_scale: 1.0 +evaluate_split: test +eval_dir_name: null +num_save_images: 64 +save_all_images: false +save_image_format: jpg +save_images_at_all_procs: false +save_latent_samples: false +latent_samples_dir: null +evaluate_dataset: sample_class +sample_class: + name: SampleClass + batch_size: 128 + n_worker: 8 + drop_last: false + seed: 0 + shuffle: false + num_classes: 1000 + num_samples: 50000 +autoencoder: + num_settings: 1 + name: dc-ae-f32c32-in-1.0-256px + scaling_factor: 0.3285 + latent_channels: null +autoencoder_dtype: fp32 +eval_autoencoder_setting_list: null +model: fp8coat_dit +dit: + name: DiT + in_channels: 32 + input_size: 8 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: SiTSampler + eval_scheduler: ODE_heun2 + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 1 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + post_norm: false + class_dropout_prob: 0.1 + num_classes: 1000 + learn_sigma: false + unconditional: false + use_checkpoint: true + adaptive_channel: false + adaptive_channel_share_weights: true + only_load_backbone: false + freeze_backbone: false +uvit: + name: UViT + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: DPM_Solver + eval_scheduler: DPM_Solver + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + mlp_time_embed: false + qkv_bias: false + act_layer: gelu + use_checkpoint: true + class_dropout_prob: 0.1 + num_classes: 1000 + attn_mode: null +sana_cls: + name: SanaCls + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: SanaScheduler + eval_scheduler: SanaScheduler + num_inference_steps: 250 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + post_norm: false + class_dropout_prob: 0.1 + num_classes: 1000 + unconditional: false + use_checkpoint: true + only_load_backbone: false + freeze_backbone: false + learn_sigma: false +usana_cls: + name: USanaCls + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: DPM_Solver + eval_scheduler: DPM_Solver + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + mlp_time_embed: false + qkv_bias: false + act_layer: gelu + use_checkpoint: true + class_dropout_prob: 0.1 + num_classes: 1000 + num_training_steps: 1000 +fp8: + name: FP8DiT +fp8coat: + name: FP8COATDiT + qchoice: linear + symm: true + row_blocksize: -1 + col_blocksize: -1 + linear_row_blocksize: 1 + linear_col_blocksize: 16 + min_blockunit_row: -1 + min_blockunit_col: -1 + fabit: NVE2M1_plus + fwbit: NVE2M1_plus + babit: NVE2M1_plus + bwbit: NVE2M1_plus + bobit: NVE2M1_plus + epsilon: 1.0e-08 +compute_fid: true +fid: + save_path: null + ref_path: assets/data/fid/imagenet_train_256.npz + precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy +compute_inception_score: true +inception_score: {} +compute_cmmd: true +cmmd: + save_path: null + ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy +verbose: false +train_dataset: latent_imagenet +latent_imagenet: + name: LatentImageNet + batch_size: 128 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256 +latent_mjhq: + name: LatentMJHQ + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024 +latent_ffhq: + name: LatentFFHQ + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024 +latent_mapillary_vistas: + name: LatentMapillaryVistas + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048 +latent_multiple_channel_imagenet: + name: LatentMultipleChannelImageNet + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + dataset_sample_ratio: null + num_channels_list: null + data_dirs: + - assets/data/latent/dc_ae_f32c32/imagenet_512 +resume: true +resume_path: null +resume_schedule: true +num_epochs: null +max_steps: 500000 +clip_grad: null +num_store_images: 64 +save_checkpoint_steps: 1000 +eval_steps: 5000 +save_eval_checkpoint_steps: 5000 +optimizer: + name: adamw + lr: 0.0001 + warmup_lr: 0.0 + weight_decay: 0.0 + no_wd_keys: [] + betas: + - 0.9 + - 0.999 +lr_scheduler: + name: constant + warmup_steps: 1000 +log: true +wandb_entity: han2024 +wandb_project: dc_ae_diffusion +ema_decay: 0.9999 +ema_warmup_steps: 2000 +eval_ema: true diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/eval_results.csv b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/eval_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..59d03f6add6e896ba9b5984ec0ba90ba4b6aaa61 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/eval_results.csv @@ -0,0 +1,66 @@ +,fid,precision,recall,inception_score_mean,inception_score_std,cmmd +step_100000_autoencoder_setting_0_cfg_1.0,33.510336098973255,0.5956599712371826,0.590499997138977,40.09199964690056,0.9867992039804704,0.7789134979248047 +step_10000_autoencoder_setting_0_cfg_1.0,73.90192494885156,0.3162199854850769,0.4894999861717224,16.27740383148511,0.277950106876481,1.399517059326172 +step_105000_autoencoder_setting_0_cfg_1.0,31.99571309674724,0.5994799733161926,0.5982999801635742,41.5446583047944,1.30438414051274,0.7535219192504883 +step_110000_autoencoder_setting_0_cfg_1.0,31.084778234585315,0.5984399914741516,0.601099967956543,42.81673014675415,1.5925164937267593,0.742793083190918 +step_115000_autoencoder_setting_0_cfg_1.0,30.33861512746074,0.6003199815750122,0.5972999930381775,43.67173277929832,1.539747753303912,0.7289648056030273 +step_120000_autoencoder_setting_0_cfg_1.0,29.81009036379396,0.6003999710083008,0.5971999764442444,44.458882738338936,1.4739077247965764,0.7200241088867188 +step_125000_autoencoder_setting_0_cfg_1.0,29.73945936977003,0.6030799746513367,0.606499969959259,44.74414435290523,1.5099839204816552,0.7243156433105469 +step_130000_autoencoder_setting_0_cfg_1.0,29.537084774585367,0.6041199564933777,0.6071999669075012,45.32032085365301,1.39568082828115,0.7216930389404297 +step_135000_autoencoder_setting_0_cfg_1.0,29.25035165062644,0.6070399880409241,0.6010000109672546,45.46803598120924,1.4409735875031866,0.7151365280151367 +step_140000_autoencoder_setting_0_cfg_1.0,28.90444763476665,0.6025800108909607,0.6018999814987183,45.597290577183394,1.354427855562807,0.705718994140625 +step_145000_autoencoder_setting_0_cfg_1.0,28.324674606056703,0.5902799963951111,0.6071999669075012,45.27874221411236,1.2957470338039971,0.6810426712036133 +step_150000_autoencoder_setting_0_cfg_1.0,28.60110797450659,0.593459963798523,0.60589998960495,44.78194086301391,1.0435081908498076,0.6895065307617188 +step_15000_autoencoder_setting_0_cfg_1.0,64.54487122268665,0.3500599861145019,0.5184000134468079,18.398179969907577,0.3522202054306349,1.2390613555908203 +step_155000_autoencoder_setting_0_cfg_1.0,31.95065719292052,0.5965200066566467,0.6033999919891357,42.916075059198,1.159331511129437,0.7718801498413086 +step_160000_autoencoder_setting_0_cfg_1.0,30.68422348806098,0.5986599922180176,0.5945999622344971,44.20955327758968,1.2711748786662864,0.7419586181640625 +step_165000_autoencoder_setting_0_cfg_1.0,33.47219299450978,0.581279993057251,0.5963000059127808,42.17644170524565,1.007066066336716,0.8236169815063477 +step_170000_autoencoder_setting_0_cfg_1.0,31.89318713646309,0.5908799767494202,0.5875999927520752,44.09051651132943,1.041214600139548,0.7936954498291016 +step_175000_autoencoder_setting_0_cfg_1.0,28.059225943542003,0.6118999719619751,0.5924000144004822,47.0833547422664,1.02520082053524,0.6825923919677734 +step_180000_autoencoder_setting_0_cfg_1.0,26.86359682215533,0.6198199987411499,0.5871999859809875,48.460707615613806,1.276621065770431,0.643610954284668 +step_185000_autoencoder_setting_0_cfg_1.0,23.19771411477757,0.6177999973297119,0.5781999826431274,52.37679030752452,1.3017335985986096,0.5334615707397461 +step_190000_autoencoder_setting_0_cfg_1.0,29.17611684625689,0.5356199741363525,0.5751999616622925,46.47262300749968,0.9186686723855764,0.6437301635742188 +step_195000_autoencoder_setting_0_cfg_1.0,29.543687644147496,0.5321199893951416,0.5625999569892883,46.76187865714515,1.1099872913116335,0.6268024444580078 +step_200000_autoencoder_setting_0_cfg_1.0,23.80048736942706,0.5858399868011475,0.5575000047683716,58.942239278429554,1.817286785028792,0.5916357040405273 +step_20000_autoencoder_setting_0_cfg_1.0,59.71597046734888,0.3647599816322326,0.5491999983787537,19.74359432040311,0.450373075317894,1.1576414108276367 +step_205000_autoencoder_setting_0_cfg_1.0,22.12023671548451,0.6092000007629395,0.5795999765396118,61.177534212409455,1.3219201412613395,0.5767345428466797 +step_210000_autoencoder_setting_0_cfg_1.0,24.4272912625878,0.6003199815750122,0.5756999850273132,55.205168250673225,1.5699953835294218,0.5776882171630859 +step_215000_autoencoder_setting_0_cfg_1.0,26.145530498882465,0.5882599949836731,0.5619999766349792,52.5792564803152,0.9331772645925596,0.583648681640625 +step_220000_autoencoder_setting_0_cfg_1.0,33.717075840811106,0.5693599581718445,0.5706999897956848,41.55646637019326,0.7472119125007876,0.6976127624511719 +step_225000_autoencoder_setting_0_cfg_1.0,37.75270042779056,0.5680400133132935,0.5640000104904175,37.35837407110965,0.9174423982675206,0.7526874542236328 +step_230000_autoencoder_setting_0_cfg_1.0,41.80794911337364,0.5369600057601929,0.5568000078201294,34.946629068675506,0.7575486856087463,0.8826255798339844 +step_235000_autoencoder_setting_0_cfg_1.0,35.97322463757547,0.5503199696540833,0.5562999844551086,40.24203779016634,1.1283857749401949,0.7938146591186523 +step_240000_autoencoder_setting_0_cfg_1.0,35.63028665616923,0.5603799819946289,0.5485000014305115,41.24571541034057,1.0281156165647911,0.7764101028442383 +step_245000_autoencoder_setting_0_cfg_1.0,30.2209251885713,0.5829600095748901,0.5669999718666077,47.19632009902397,1.1179175794254577,0.6849765777587891 +step_250000_autoencoder_setting_0_cfg_1.0,28.80573283180962,0.6025999784469604,0.5428000092506409,48.640761477471074,0.8757591899906153,0.6220340728759766 +step_25000_autoencoder_setting_0_cfg_1.0,57.00759901774785,0.3761000037193298,0.5557999610900879,20.628880517156603,0.3562785811510151,1.1113882064819336 +step_255000_autoencoder_setting_0_cfg_1.0,31.98233728231185,0.5830399990081787,0.5428000092506409,43.99541556516653,1.0652455905332407,0.6737709045410156 +step_260000_autoencoder_setting_0_cfg_1.0,31.68354833974729,0.5862199664115906,0.552299976348877,43.2768589576001,0.9986364453619164,0.6620883941650391 +step_265000_autoencoder_setting_0_cfg_1.0,31.50612363132126,0.5783999562263489,0.5652999877929688,43.98134720708489,0.9954869364396132,0.681757926940918 +step_270000_autoencoder_setting_0_cfg_1.0,31.68609210507236,0.571619987487793,0.5719999670982361,43.15291995516085,0.8036755546675325,0.6895065307617188 +step_275000_autoencoder_setting_0_cfg_1.0,29.74582904389871,0.585860013961792,0.5733999609947205,45.70878680122093,0.9328789651107372,0.6787776947021484 +step_280000_autoencoder_setting_0_cfg_1.0,28.08477422446685,0.5943199992179871,0.5595999956130981,47.97159710271989,0.934454655944602,0.6558895111083984 +step_285000_autoencoder_setting_0_cfg_1.0,37.16351652347868,0.5593599677085876,0.5462999939918518,38.821064409111735,0.8984244883954947,0.7899999618530273 +step_290000_autoencoder_setting_0_cfg_1.0,35.13686225649491,0.5741400122642517,0.5638999938964844,39.28087561670991,1.1236658537717517,0.7501840591430664 +step_295000_autoencoder_setting_0_cfg_1.0,33.163526727613714,0.5811600089073181,0.5708000063896179,41.46022925753859,1.0585913435556151,0.7112026214599609 +step_300000_autoencoder_setting_0_cfg_1.0,38.86992745092755,0.5604599714279175,0.5561000108718872,36.608194517420586,0.7411830008857224,0.8028745651245117 +step_30000_autoencoder_setting_0_cfg_1.0,55.00772555796266,0.3881599903106689,0.5625,21.4777448838741,0.3303526685224319,1.0881423950195312 +step_305000_autoencoder_setting_0_cfg_1.0,40.654731308000805,0.5505399703979492,0.5654999613761902,34.86966265090955,0.664410489582783,0.8350610733032227 +step_310000_autoencoder_setting_0_cfg_1.0,37.667755599753264,0.5451200008392334,0.560699999332428,36.70080426804611,0.8777978541854278,0.7804632186889648 +step_315000_autoencoder_setting_0_cfg_1.0,41.79937248106677,0.5360599756240845,0.5679999589920044,33.45560467093258,0.8521028202931553,0.8482933044433594 +step_320000_autoencoder_setting_0_cfg_1.0,51.47804312534464,0.4896599948406219,0.5291000008583069,28.043407163850866,0.4270782284876955,1.0304450988769531 +step_325000_autoencoder_setting_0_cfg_1.0,48.228641188953134,0.495419979095459,0.5407999753952026,28.574792479385394,0.40829275689333416,0.9474754333496094 +step_35000_autoencoder_setting_0_cfg_1.0,53.2382308378962,0.4078599810600281,0.5694999694824219,22.289185030567616,0.3048117845165478,1.0652542114257812 +step_40000_autoencoder_setting_0_cfg_1.0,51.75855118114072,0.4341999888420105,0.576200008392334,23.356298751607465,0.4521947034097131,1.049637794494629 +step_45000_autoencoder_setting_0_cfg_1.0,50.30115830364991,0.4549599885940552,0.5882999897003174,24.55035109111102,0.5621372832031544,1.0350942611694336 +step_50000_autoencoder_setting_0_cfg_1.0,48.415261502685325,0.4780399799346924,0.5922999978065491,25.98636236345556,0.6534759089226885,1.019001007080078 +step_5000_autoencoder_setting_0_cfg_1.0,89.0873257052084,0.2642599940299988,0.4005999863147735,13.879401738049452,0.281824832227157,1.6162395477294922 +step_55000_autoencoder_setting_0_cfg_1.0,46.70711673379924,0.5,0.5934999585151672,27.346334763245828,0.7571752924388021,0.9958744049072266 +step_60000_autoencoder_setting_0_cfg_1.0,45.71089146050844,0.5166000127792358,0.5879999995231628,28.46846628558452,0.8322938709885304,0.982046127319336 +step_65000_autoencoder_setting_0_cfg_1.0,45.76202067657903,0.5302199721336365,0.5907999873161316,28.90117178672436,0.7314161581733065,0.9901523590087892 +step_70000_autoencoder_setting_0_cfg_1.0,46.257953874742896,0.5351399779319763,0.5892999768257141,29.143563631670304,0.6718447316445654,1.000165939331055 +step_75000_autoencoder_setting_0_cfg_1.0,45.70217787873264,0.5448399782180786,0.5879999995231628,29.95001729137772,0.6204188052274286,0.9901523590087892 +step_80000_autoencoder_setting_0_cfg_1.0,43.23189977289189,0.5568000078201294,0.5814999938011169,31.8562303396425,0.6517948737403826,0.9478330612182616 +step_85000_autoencoder_setting_0_cfg_1.0,40.700212981158586,0.5700399875640869,0.5794000029563904,33.96066291911782,0.7829944264755757,0.9067058563232422 +step_90000_autoencoder_setting_0_cfg_1.0,37.9045656958437,0.5823400020599365,0.5852000117301941,36.2067636052034,0.8642681408317301,0.859379768371582 +step_95000_autoencoder_setting_0_cfg_1.0,35.42743405262286,0.5922600030899048,0.5907999873161316,38.43274352690076,0.9939473698385852,0.8138418197631836 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/log.txt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5deadbcf6857268652237a3951d6308261486a80 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cs/log.txt @@ -0,0 +1,3494 @@ +run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_plus_cscan not find a checkpoint, will train from scratch + Train Epoch #1: 0%| | 0/1251 [00:00