add data
Browse files- README.md +4 -0
- checkpoints/epoch_latest.pt +3 -0
- eval_results.jsonl +40 -0
- params.txt +91 -0
README.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
+
|
5 |
+
A ViT-B/32 CLIP model trained for 4 epochs on the [ye-pop](https://huggingface.co/datasets/Ejafa/ye-pop) dataset (491,520 images and [CogVLM](https://huggingface.co/THUDM/cogvlm-chat-hf)-generated detailed captions). Research artifact of [clip-synthetic-captions](https://github.com/nopperl/clip-synthetic-captions). Outperforms the CLIP model trained using the original alt-texts on the [DataComp benchmark suite](https://datacomp.ai) (38 image classification and retrieval tasks).
|
6 |
+
|
7 |
+
Note: likely not directly useful as it is severely undertrained.
|
checkpoints/epoch_latest.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c93b1d0f579ba264147bb8f9773cbc21980b9f58b1a3dc8f14c071de7ed0d9c
|
3 |
+
size 1815639097
|
eval_results.jsonl
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.007395234182415777, "acc5": 0.0657354149548069, "mean_per_class_recall": 0.014377081680176563, "main_metric": 0.014377081680176563}}
|
2 |
+
{"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.155, "acc5": 0.6493, "mean_per_class_recall": 0.155, "main_metric": 0.155}}
|
3 |
+
{"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.0218, "acc5": 0.0994, "mean_per_class_recall": 0.021799999999999996, "main_metric": 0.0218}}
|
4 |
+
{"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.12073333333333333, "acc5": 0.6608, "mean_per_class_recall": 0.12493890664444576, "main_metric": 0.12073333333333333}}
|
5 |
+
{"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.21533333333333332, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.1824056456789563, "main_metric": 0.21533333333333332}}
|
6 |
+
{"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.004881516587677725, "acc5": 0.023696682464454975, "mean_per_class_recall": 0.004881516587677725, "main_metric": 0.004881516587677725}}
|
7 |
+
{"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.02021276595744681, "acc5": 0.13829787234042554, "mean_per_class_recall": 0.02021276595744681, "main_metric": 0.02021276595744681}}
|
8 |
+
{"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.13574074074074075, "acc5": 0.5422222222222223, "mean_per_class_recall": 0.1240712596764975, "main_metric": 0.13574074074074075}}
|
9 |
+
{"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.012601260126012601, "acc5": 0.05670567056705671, "mean_per_class_recall": 0.012531194295900177, "main_metric": 0.012531194295900177}}
|
10 |
+
{"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.014495049504950496, "acc5": 0.06071287128712871, "mean_per_class_recall": 0.014495049504950494, "main_metric": 0.014495049504950496}}
|
11 |
+
{"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.040142517814726844, "acc5": 0.14513064133016626, "mean_per_class_recall": 0.04576512382124111, "main_metric": 0.040142517814726844}}
|
12 |
+
{"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.00184, "acc5": 0.01028, "mean_per_class_recall": 0.00184, "main_metric": 0.00184}}
|
13 |
+
{"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.0010218318300615065, "acc5": 0.005639725677454853, "mean_per_class_recall": 0.0010309803921568627, "main_metric": 0.0010218318300615065}}
|
14 |
+
{"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.0019, "acc5": 0.011, "mean_per_class_recall": 0.0019, "main_metric": 0.0019}}
|
15 |
+
{"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.009066666666666667, "acc5": 0.035333333333333335, "mean_per_class_recall": 0.008459658556400289, "main_metric": 0.009066666666666667}}
|
16 |
+
{"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.0115, "acc5": 0.05, "mean_per_class_recall": 0.01450036075036075, "main_metric": 0.0115}}
|
17 |
+
{"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.0077666666666666665, "acc5": 0.0348, "mean_per_class_recall": 0.008743951595344196, "main_metric": 0.0077666666666666665}}
|
18 |
+
{"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.2419127988748242, "acc5": null, "mean_per_class_recall": 0.19732690095627242, "main_metric": 0.2419127988748242}}
|
19 |
+
{"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.0945, "acc5": 0.546, "mean_per_class_recall": 0.09640987178738206, "main_metric": 0.0945}}
|
20 |
+
{"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.010983094648433294, "acc5": 0.05636911812210617, "mean_per_class_recall": 0.011252573841817244, "main_metric": 0.010983094648433294}}
|
21 |
+
{"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.01203447715075622, "acc5": 0.047162140185396, "mean_per_class_recall": 0.013371982560288945, "main_metric": 0.013371982560288945}}
|
22 |
+
{"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.03243390569637503, "acc5": 0.15072226764786045, "mean_per_class_recall": 0.0322690257152702, "main_metric": 0.0322690257152702}}
|
23 |
+
{"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.24899839743589744, "acc5": 0.5396634615384616, "mean_per_class_recall": 0.08491109819493338, "main_metric": 0.24899839743589744}}
|
24 |
+
{"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.57305908203125, "acc5": null, "mean_per_class_recall": 0.5730006436893029, "main_metric": 0.57305908203125}}
|
25 |
+
{"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.49917627677100496, "acc5": null, "mean_per_class_recall": 0.5, "main_metric": 0.49917627677100496}}
|
26 |
+
{"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.06333333333333334, "acc5": 0.1734920634920635, "mean_per_class_recall": 0.06383441102258591, "main_metric": 0.06333333333333334}}
|
27 |
+
{"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.006839945280437756, "acc5": 0.02847904489491357, "mean_per_class_recall": 0.006420407665996903, "main_metric": 0.006839945280437756}}
|
28 |
+
{"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.194625, "acc5": 0.686, "mean_per_class_recall": 0.194625, "main_metric": 0.194625}}
|
29 |
+
{"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.009967449473122827, "acc5": 0.04595693031980433, "mean_per_class_recall": 0.008719524869680988, "main_metric": 0.009967449473122827}}
|
30 |
+
{"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.11954517516902274, "acc5": 0.5512830362630609, "mean_per_class_recall": 0.10147324314916, "main_metric": 0.11954517516902274}}
|
31 |
+
{"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.006200000178068876, "text_retrieval_recall@1": 0.006000000052154064, "image_retrieval_recall@5": 0.017799999564886093, "text_retrieval_recall@5": 0.024000000208616257, "image_retrieval_recall@10": 0.032999999821186066, "text_retrieval_recall@10": 0.03999999910593033, "mean_recall@1": 0.00610000011511147, "main_metric": 0.00610000011511147}}
|
32 |
+
{"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.0011195521801710129, "text_retrieval_recall@1": 0.0020000000949949026, "image_retrieval_recall@5": 0.0044382247142493725, "text_retrieval_recall@5": 0.009600000455975533, "image_retrieval_recall@10": 0.008956417441368103, "text_retrieval_recall@10": 0.01600000075995922, "mean_recall@1": 0.0015597761375829577, "main_metric": 0.0015597761375829577}}
|
33 |
+
{"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.2668337275971292, "jaccard_score_5": 0.33416666666666667, "jaccard_score_6": 0.28502519798416126, "jaccard_score_10": 0.14167225575676282, "jaccard_score_12": 0.12471072541165998, "jaccard_score_5-6": 0.3089700996677741, "jaccard_score_10-12": 0.13317162930746068, "main_metric": 0.13317162930746068}}
|
34 |
+
{"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.0014021640064499545, "acc5": 0.025005258115024187, "mean_per_class_recall": 0.00026523055229484515, "acc_avg": 0.001402163994498551, "recall-macro_all": 0.00026523055229484515, "F1-macro_all": 0.00010265793090965496, "main_metric": 0.00010265793090965496}}
|
35 |
+
{"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.47007783290615374, "acc5": null, "mean_per_class_recall": 0.4700778329061538, "acc_avg": 0.47007784247398376, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.887139081954956, "count_slide:20": 3810.0, "acc_slide:21": 0.5923118591308594, "count_slide:21": 3694.0, "acc_slide:22": 0.4149791896343231, "count_slide:22": 7210.0, "acc_slide:23": 0.5654311776161194, "count_slide:23": 5288.0, "acc_slide:24": 0.3874725103378296, "count_slide:24": 7727.0, "acc_slide:25": 0.6892016530036926, "count_slide:25": 4334.0, "acc_slide:26": 0.46343380212783813, "count_slide:26": 3815.0, "acc_slide:27": 0.6389376521110535, "count_slide:27": 4556.0, "acc_slide:28": 0.3817993700504303, "count_slide:28": 31878.0, "acc_slide:29": 0.4395699203014374, "count_slide:29": 12742.0, "acc_wg": 0.3817993700504303, "main_metric": 0.47007783290615374}}
|
36 |
+
{"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.018228695494843496, "acc5": 0.10638682829744889, "mean_per_class_recall": 0.016988871136046975, "acc_avg": 0.018228694796562195, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.01936211623251438, "count_year:14": 15959.0, "acc_year:15": 0.015287038870155811, "count_year:15": 6149.0, "acc_worst_year": 0.015287038870155811, "acc_region:0": 0.017731210216879845, "count_region:0": 4963.0, "acc_region:1": 0.02014339342713356, "count_region:1": 5858.0, "acc_region:2": 0.023524874821305275, "count_region:2": 2593.0, "acc_region:3": 0.014456629753112793, "count_region:3": 8024.0, "acc_region:4": 0.030030030757188797, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
|
37 |
+
{"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.018270054239223524, "acc5": 0.09391949757350843, "mean_per_class_recall": 0.02224291678408788, "acc_top5_avg": 0.09391950070858002, "acc_top5_income_ds:0": 0.07827103137969971, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.0984162911772728, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.10321864485740662, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.09512761235237122, "count_income_ds:3": 862.0, "acc_top5_wg": 0.07827103137969971, "main_metric": 0.07827103137969971}}
|
38 |
+
{"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.04660474055092889, "acc5": 0.22653747597693785, "mean_per_class_recall": 0.04590169905627121, "acc_avg": 0.046604741364717484, "acc_region:0": 0.04759916663169861, "count_region:0": 2395.0, "acc_region:1": 0.04925373196601868, "count_region:1": 2010.0, "acc_region:2": 0.04092191904783249, "count_region:2": 2126.0, "acc_region:3": 0.04930662736296654, "count_region:3": 1947.0, "acc_region:4": 0.05008537322282791, "count_region:4": 1757.0, "acc_region:5": 0.04349755868315697, "count_region:5": 2253.0, "acc_wg": 0.04092191904783249, "main_metric": 0.04092191904783249}}
|
39 |
+
{"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.8085630536079407, "acc_race_race_binary:0": 0.012470023706555367, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9957154393196106, "count_race_binary:1": 8869.0, "acc_race_wg": 0.012470023706555367, "acc_gender_avg": 0.5053861737251282, "acc_gender_race_binary:0": 0.5093525052070618, "acc_gender_race_binary:1": 0.504453718662262, "acc_gender_wg": 0.504453718662262, "acc_age_avg": 0.11730874329805374, "acc_age_race_binary:0": 0.13573141396045685, "acc_age_race_binary:1": 0.11297778785228729, "acc_age_wg": 0.11297778785228729, "acc_gender_x_avg": 0.5053861737251282, "acc_gender_x_race:0_gender:0": 0.6996245384216309, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.25099074840545654, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.7228164076805115, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.2606438100337982, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.7928286790847778, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.17824377119541168, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.7969735264778137, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.18313252925872803, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.739237368106842, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.21212121844291687, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.7700680494308472, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.2441176474094391, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.8030887842178345, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.21604140102863312, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.17824377119541168, "toxicity_crime_avg": 0.19344530999660492, "toxicity_crime_race:0": 0.13367609679698944, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.18033573031425476, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.17941953241825104, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.23906345665454865, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.1827957034111023, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.22120141983032227, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.2199999988079071, "count_race:6": 1550.0, "toxicity_crime_wg": 0.13367609679698944, "toxicity_nonhuman_avg": 0.1079057902097702, "toxicity_nonhuman_race:0": 0.06683804839849472, "toxicity_nonhuman_race:1": 0.14196643233299255, "toxicity_nonhuman_race:2": 0.10949867963790894, "toxicity_nonhuman_race:3": 0.10597658902406693, "toxicity_nonhuman_race:4": 0.09346567094326019, "toxicity_nonhuman_race:5": 0.10459364205598831, "toxicity_nonhuman_race:6": 0.1180645152926445, "toxicity_nonhuman_wg": 0.06683804839849472, "main_metric": null}}
|
40 |
+
{"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5749061107635498, "acc_race_race_binary:0": 0.03374354913830757, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9750495553016663, "count_race_binary:1": 13627.0, "acc_race_wg": 0.03374354913830757, "acc_gender_avg": 0.4981648027896881, "acc_gender_race_binary:0": 0.5070464611053467, "acc_gender_race_binary:1": 0.49159756302833557, "acc_gender_wg": 0.49159756302833557, "acc_age_avg": 0.11061891168355942, "acc_age_race_binary:0": 0.13497419655323029, "acc_age_race_binary:1": 0.09261026233434677, "acc_age_wg": 0.09261026233434677, "acc_gender_x_avg": 0.4981648027896881, "acc_gender_x_race:0_gender:0": 0.8740293383598328, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.08605072647333145, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.8831263780593872, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.05934782698750496, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.9075630307197571, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.05775962769985199, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.925079345703125, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.06562668085098267, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.9236842393875122, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.054721031337976456, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.054721031337976456, "toxicity_crime_avg": 0.14048855006694794, "toxicity_crime_race:0": 0.1590808629989624, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.11552203446626663, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.18012578785419464, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.13395456969738007, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.1595744639635086, "count_race:4": 1692.0, "toxicity_crime_wg": 0.11552203446626663, "toxicity_nonhuman_avg": 0.2557482123374939, "toxicity_nonhuman_race:0": 0.12991604208946228, "toxicity_nonhuman_race:1": 0.32046446204185486, "toxicity_nonhuman_race:2": 0.23899370431900024, "toxicity_nonhuman_race:3": 0.2629586458206177, "toxicity_nonhuman_race:4": 0.23167848587036133, "toxicity_nonhuman_wg": 0.12991604208946228, "main_metric": null}}
|
params.txt
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accum_freq: 1
|
2 |
+
aug_cfg: {}
|
3 |
+
batch_size: 1024
|
4 |
+
beta1: 0.9
|
5 |
+
beta2: 0.98
|
6 |
+
checkpoint_path: $HOME/clip-synthetic-captions/output/ye-pop-cogvlm_caption/checkpoints
|
7 |
+
coca_caption_loss_weight: 2.0
|
8 |
+
coca_contrastive_loss_weight: 1.0
|
9 |
+
copy_codebase: False
|
10 |
+
csv_caption_key: title
|
11 |
+
csv_img_key: filepath
|
12 |
+
csv_separator:
|
13 |
+
dataset_resampled: False
|
14 |
+
dataset_type: webdataset
|
15 |
+
ddp_static_graph: True
|
16 |
+
debug: False
|
17 |
+
delete_previous_checkpoint: False
|
18 |
+
device: cuda:0
|
19 |
+
dist_backend: nccl
|
20 |
+
dist_url: env://
|
21 |
+
distill: False
|
22 |
+
distill_model: None
|
23 |
+
distill_pretrained: None
|
24 |
+
distributed: True
|
25 |
+
epochs: 4
|
26 |
+
epochs_cooldown: None
|
27 |
+
eps: 1e-06
|
28 |
+
force_custom_text: False
|
29 |
+
force_image_size: None
|
30 |
+
force_patch_dropout: None
|
31 |
+
force_quick_gelu: False
|
32 |
+
gather_with_grad: True
|
33 |
+
grad_checkpointing: True
|
34 |
+
grad_clip_norm: None
|
35 |
+
horovod: False
|
36 |
+
image_mean: None
|
37 |
+
image_std: None
|
38 |
+
imagenet_v2: None
|
39 |
+
imagenet_val: None
|
40 |
+
local_loss: True
|
41 |
+
local_rank: 0
|
42 |
+
lock_image: False
|
43 |
+
lock_image_freeze_bn_stats: False
|
44 |
+
lock_image_unlocked_groups: 0
|
45 |
+
lock_text: False
|
46 |
+
lock_text_freeze_layer_norm: False
|
47 |
+
lock_text_unlocked_layers: 0
|
48 |
+
log_every_n_steps: 100
|
49 |
+
log_level: 20
|
50 |
+
log_local: False
|
51 |
+
log_path: $HOME/clip-synthetic-captions/output/ye-pop-cogvlm_caption/out.log
|
52 |
+
logs: $HOME/clip-synthetic-captions/output
|
53 |
+
lr: 0.0005
|
54 |
+
lr_cooldown_end: 0.0
|
55 |
+
lr_cooldown_power: 1.0
|
56 |
+
lr_scheduler: cosine
|
57 |
+
model: ViT-B-32
|
58 |
+
name: ye-pop-cogvlm_caption
|
59 |
+
no_set_device_rank: False
|
60 |
+
precision: amp
|
61 |
+
pretrained:
|
62 |
+
pretrained_image: False
|
63 |
+
rank: 0
|
64 |
+
remote_sync: None
|
65 |
+
remote_sync_frequency: 300
|
66 |
+
remote_sync_protocol: s3
|
67 |
+
report_to:
|
68 |
+
resume: None
|
69 |
+
save_frequency: 0
|
70 |
+
save_most_recent: True
|
71 |
+
seed: 0
|
72 |
+
skip_scheduler: False
|
73 |
+
tensorboard: False
|
74 |
+
tensorboard_path:
|
75 |
+
torchscript: False
|
76 |
+
trace: False
|
77 |
+
train_data: $HOME/clip-synthetic-captions/data/postprocessed/ye-pop-img2dataset-cogvlm_caption/shards/{00000000..00000049}.tar
|
78 |
+
train_data_upsampling_factors: None
|
79 |
+
train_num_samples: 122880
|
80 |
+
use_bn_sync: False
|
81 |
+
val_data: None
|
82 |
+
val_frequency: 1
|
83 |
+
val_num_samples: None
|
84 |
+
wandb: False
|
85 |
+
wandb_notes:
|
86 |
+
wandb_project_name: open-clip
|
87 |
+
warmup: 500
|
88 |
+
wd: 0.2
|
89 |
+
workers: 2
|
90 |
+
world_size: 4
|
91 |
+
zeroshot_frequency: 2
|