Upload training_args.json
Browse files- training_args.json +106 -0
training_args.json
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cmdline": "train.py --network flexivit_reg1_s16_rms_ls --tag dino-v2-il-all --opt adamw --lr 0.0004 --lr-scheduler-update iter --lr-scheduler cosine --lr-cosine-min 1e-7 --batch-size 128 --warmup-epochs 10 --epochs 100 --size 240 --wd 0.05 --norm-wd 0 --grad-accum-steps 4 --smoothing-alpha 0.1 --mixup-alpha 0.8 --cutmix --aug-level 8 --model-ema --ra-sampler --ra-reps 2 --clip-grad-norm 1 --model-config min_patch_size=10,max_patch_size=40 --amp --amp-dtype bfloat16 --compile --rgb-mode none --layer-decay 0.7 --resume-epoch 0 --data-path data/training_il-all_packed --val-path data/validation_il-all_packed",
|
3 |
+
"network": "flexivit_reg1_s16_rms_ls",
|
4 |
+
"net_param": null,
|
5 |
+
"model_config": {
|
6 |
+
"min_patch_size": 10,
|
7 |
+
"max_patch_size": 40
|
8 |
+
},
|
9 |
+
"pretrained": false,
|
10 |
+
"reset_head": false,
|
11 |
+
"freeze_body": false,
|
12 |
+
"freeze_stages": null,
|
13 |
+
"unfreeze_features": false,
|
14 |
+
"compile": true,
|
15 |
+
"compile_opt": false,
|
16 |
+
"opt": "adamw",
|
17 |
+
"momentum": 0.9,
|
18 |
+
"nesterov": false,
|
19 |
+
"opt_eps": null,
|
20 |
+
"opt_betas": null,
|
21 |
+
"opt_alpha": null,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"bias_lr": null,
|
24 |
+
"lr_scale": null,
|
25 |
+
"lr_scale_type": "linear",
|
26 |
+
"wd": 0.05,
|
27 |
+
"norm_wd": 0.0,
|
28 |
+
"bias_weight_decay": null,
|
29 |
+
"transformer_embedding_decay": null,
|
30 |
+
"layer_decay": 0.7,
|
31 |
+
"lr_scheduler_update": "iter",
|
32 |
+
"lr_scheduler": "cosine",
|
33 |
+
"lr_step_size": 40,
|
34 |
+
"lr_steps": null,
|
35 |
+
"lr_step_gamma": 0.75,
|
36 |
+
"lr_cosine_min": 1e-07,
|
37 |
+
"lr_power": 1.0,
|
38 |
+
"grad_accum_steps": 4,
|
39 |
+
"channels": 3,
|
40 |
+
"size": [
|
41 |
+
240,
|
42 |
+
240
|
43 |
+
],
|
44 |
+
"freeze_bn": false,
|
45 |
+
"sync_bn": false,
|
46 |
+
"batch_size": 128,
|
47 |
+
"warmup_epochs": 10,
|
48 |
+
"smoothing_alpha": 0.1,
|
49 |
+
"mixup_alpha": 0.8,
|
50 |
+
"cutmix": true,
|
51 |
+
"aug_type": "birder",
|
52 |
+
"aug_level": 8,
|
53 |
+
"use_grayscale": false,
|
54 |
+
"ra_num_ops": 2,
|
55 |
+
"ra_magnitude": 9,
|
56 |
+
"augmix_severity": 3,
|
57 |
+
"resize_min_scale": null,
|
58 |
+
"re_prob": null,
|
59 |
+
"simple_crop": false,
|
60 |
+
"rgb_mode": "none",
|
61 |
+
"bce_loss": false,
|
62 |
+
"bce_threshold": 0.0,
|
63 |
+
"epochs": 100,
|
64 |
+
"stop_epoch": 101,
|
65 |
+
"save_frequency": 5,
|
66 |
+
"keep_last": null,
|
67 |
+
"resume_epoch": 0,
|
68 |
+
"load_states": false,
|
69 |
+
"load_scheduler": false,
|
70 |
+
"model_ema": true,
|
71 |
+
"model_ema_steps": 32,
|
72 |
+
"model_ema_decay": 0.9999,
|
73 |
+
"ra_sampler": true,
|
74 |
+
"ra_reps": 2,
|
75 |
+
"tag": "dino-v2-il-all",
|
76 |
+
"log_interval": 50,
|
77 |
+
"num_workers": 8,
|
78 |
+
"prefetch_factor": null,
|
79 |
+
"drop_last": false,
|
80 |
+
"model_dtype": "float32",
|
81 |
+
"amp": true,
|
82 |
+
"amp_dtype": "bfloat16",
|
83 |
+
"fast_matmul": false,
|
84 |
+
"grad_anomaly_detection": false,
|
85 |
+
"world_size": 2,
|
86 |
+
"dist_url": "env://",
|
87 |
+
"clip_grad_norm": 1.0,
|
88 |
+
"gpu": 0,
|
89 |
+
"cpu": false,
|
90 |
+
"use_deterministic_algorithms": false,
|
91 |
+
"plot_lr": false,
|
92 |
+
"no_summary": false,
|
93 |
+
"val_path": "data/validation_il-all_packed",
|
94 |
+
"data_path": "data/training_il-all_packed",
|
95 |
+
"wds": false,
|
96 |
+
"wds_info": null,
|
97 |
+
"wds_class_file": null,
|
98 |
+
"wds_cache_dir": null,
|
99 |
+
"wds_train_size": null,
|
100 |
+
"wds_val_size": null,
|
101 |
+
"wds_training_split": "training",
|
102 |
+
"wds_val_split": "validation",
|
103 |
+
"rank": 0,
|
104 |
+
"distributed": true,
|
105 |
+
"dist_backend": "nccl"
|
106 |
+
}
|