hassonofer commited on
Commit
6561a40
·
verified ·
1 Parent(s): b6ee6f9

Upload training_args.json

Browse files
Files changed (1) hide show
  1. training_args.json +106 -0
training_args.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cmdline": "train.py --network flexivit_reg1_s16_rms_ls --tag dino-v2-il-all --opt adamw --lr 0.0004 --lr-scheduler-update iter --lr-scheduler cosine --lr-cosine-min 1e-7 --batch-size 128 --warmup-epochs 10 --epochs 100 --size 240 --wd 0.05 --norm-wd 0 --grad-accum-steps 4 --smoothing-alpha 0.1 --mixup-alpha 0.8 --cutmix --aug-level 8 --model-ema --ra-sampler --ra-reps 2 --clip-grad-norm 1 --model-config min_patch_size=10,max_patch_size=40 --amp --amp-dtype bfloat16 --compile --rgb-mode none --layer-decay 0.7 --resume-epoch 0 --data-path data/training_il-all_packed --val-path data/validation_il-all_packed",
3
+ "network": "flexivit_reg1_s16_rms_ls",
4
+ "net_param": null,
5
+ "model_config": {
6
+ "min_patch_size": 10,
7
+ "max_patch_size": 40
8
+ },
9
+ "pretrained": false,
10
+ "reset_head": false,
11
+ "freeze_body": false,
12
+ "freeze_stages": null,
13
+ "unfreeze_features": false,
14
+ "compile": true,
15
+ "compile_opt": false,
16
+ "opt": "adamw",
17
+ "momentum": 0.9,
18
+ "nesterov": false,
19
+ "opt_eps": null,
20
+ "opt_betas": null,
21
+ "opt_alpha": null,
22
+ "lr": 0.0004,
23
+ "bias_lr": null,
24
+ "lr_scale": null,
25
+ "lr_scale_type": "linear",
26
+ "wd": 0.05,
27
+ "norm_wd": 0.0,
28
+ "bias_weight_decay": null,
29
+ "transformer_embedding_decay": null,
30
+ "layer_decay": 0.7,
31
+ "lr_scheduler_update": "iter",
32
+ "lr_scheduler": "cosine",
33
+ "lr_step_size": 40,
34
+ "lr_steps": null,
35
+ "lr_step_gamma": 0.75,
36
+ "lr_cosine_min": 1e-07,
37
+ "lr_power": 1.0,
38
+ "grad_accum_steps": 4,
39
+ "channels": 3,
40
+ "size": [
41
+ 240,
42
+ 240
43
+ ],
44
+ "freeze_bn": false,
45
+ "sync_bn": false,
46
+ "batch_size": 128,
47
+ "warmup_epochs": 10,
48
+ "smoothing_alpha": 0.1,
49
+ "mixup_alpha": 0.8,
50
+ "cutmix": true,
51
+ "aug_type": "birder",
52
+ "aug_level": 8,
53
+ "use_grayscale": false,
54
+ "ra_num_ops": 2,
55
+ "ra_magnitude": 9,
56
+ "augmix_severity": 3,
57
+ "resize_min_scale": null,
58
+ "re_prob": null,
59
+ "simple_crop": false,
60
+ "rgb_mode": "none",
61
+ "bce_loss": false,
62
+ "bce_threshold": 0.0,
63
+ "epochs": 100,
64
+ "stop_epoch": 101,
65
+ "save_frequency": 5,
66
+ "keep_last": null,
67
+ "resume_epoch": 0,
68
+ "load_states": false,
69
+ "load_scheduler": false,
70
+ "model_ema": true,
71
+ "model_ema_steps": 32,
72
+ "model_ema_decay": 0.9999,
73
+ "ra_sampler": true,
74
+ "ra_reps": 2,
75
+ "tag": "dino-v2-il-all",
76
+ "log_interval": 50,
77
+ "num_workers": 8,
78
+ "prefetch_factor": null,
79
+ "drop_last": false,
80
+ "model_dtype": "float32",
81
+ "amp": true,
82
+ "amp_dtype": "bfloat16",
83
+ "fast_matmul": false,
84
+ "grad_anomaly_detection": false,
85
+ "world_size": 2,
86
+ "dist_url": "env://",
87
+ "clip_grad_norm": 1.0,
88
+ "gpu": 0,
89
+ "cpu": false,
90
+ "use_deterministic_algorithms": false,
91
+ "plot_lr": false,
92
+ "no_summary": false,
93
+ "val_path": "data/validation_il-all_packed",
94
+ "data_path": "data/training_il-all_packed",
95
+ "wds": false,
96
+ "wds_info": null,
97
+ "wds_class_file": null,
98
+ "wds_cache_dir": null,
99
+ "wds_train_size": null,
100
+ "wds_val_size": null,
101
+ "wds_training_split": "training",
102
+ "wds_val_split": "validation",
103
+ "rank": 0,
104
+ "distributed": true,
105
+ "dist_backend": "nccl"
106
+ }