xintaozhen commited on
Commit
d70d5cd
·
verified ·
1 Parent(s): 5a15479

Upload 7 files

Browse files
models/minivla-vq-libero90-prismatic/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer.jsonl filter=lfs diff=lfs merge=lfs -text
models/minivla-vq-libero90-prismatic/README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - robotics
5
+ - vla
6
+ - image-text-to-text
7
+ - multimodal
8
+ - pretraining
9
+ license: mit
10
+ language:
11
+ - en
12
+ pipeline_tag: image-text-to-text
13
+ ---
14
+
15
+ # MiniVLA VQ 1B (Prismatic-Compatible Version)
16
+
17
+ <b>This checkpoint is in a format that is compatible with the training script from the original [Prismatic VLMs project codebase](https://github.com/TRI-ML/prismatic-vlms), which the OpenVLA
18
+ team built on top of to develop the OpenVLA model.</b>
19
+
20
+ This Prismatic-compatible checkpoint may be useful if you wish to <b>fully fine-tune</b> MiniVLA (all 1 billion parameters) via native PyTorch Fully
21
+ Sharded Data Parallel (FSDP) using the Prismatic VLMs training script. If you instead wish to do Parameter-Efficient Fine-Tuning via LoRA, you
22
+ can use the MiniVLA checkpoint linked above, which is compatible with the Hugging Face `transformers` library. We recommend fine-tuning via LoRA if
23
+ you do not have sufficient compute to fully fine-tune a 1B-parameter model (e.g., multiple A100/H100 GPUs).
24
+
25
+ ## Usage Instructions
26
+
27
+ See the [MiniVLA GitHub README](https://github.com/Stanford-ILIAD/openvla-mini/blob/main/README.md) for instructions on how to use this checkpoint for full fine-tuning.
28
+
29
+ ## Citation
30
+
31
+ **BibTeX:**
32
+
33
+ ```bibtex
34
+ @article{belkhale24minivla,
35
+ title={MiniVLA: A Better VLA with a Smaller Footprint},
36
+ author={Suneel Belkhale and Dorsa Sadigh},
37
+ url={https://github.com/Stanford-ILIAD/openvla-mini}
38
+ year={2024}
39
+ }
40
+ ```
models/minivla-vq-libero90-prismatic/checkpoints/step-150000-epoch-67-loss=0.0934.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99cfd157acca023e10b6b2d0305a2e4d480ff55a371646012404e9b32c3700b
3
+ size 5554882540
models/minivla-vq-libero90-prismatic/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_root_dir": "/hai/scratch/belkhale/datasets",
3
+ "hf_token": ".hf_token",
4
+ "image_aug": false,
5
+ "is_resume": true,
6
+ "pretrained_checkpoint": "runs/prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer/checkpoints/step-065000-epoch-29-loss=0.4407.pt",
7
+ "resume_epoch": 29,
8
+ "resume_step": 65000,
9
+ "run_id": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer",
10
+ "run_id_note": "vq_extra_tokenizer",
11
+ "run_root_dir": "runs",
12
+ "save_interval": 2500,
13
+ "seed": 7,
14
+ "trackers": [
15
+ "jsonl",
16
+ "wandb"
17
+ ],
18
+ "vla": {
19
+ "action_tokenizer": "libero_vq_extra_action_tokenizer",
20
+ "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b",
21
+ "data_mix": "libero_90",
22
+ "enable_gradient_checkpointing": true,
23
+ "enable_mixed_precision_training": true,
24
+ "epochs": 1000,
25
+ "expected_world_size": 8,
26
+ "freeze_llm_backbone": false,
27
+ "freeze_vision_backbone": false,
28
+ "global_batch_size": 256,
29
+ "learning_rate": 2e-05,
30
+ "lr_scheduler_type": "constant",
31
+ "max_grad_norm": 1.0,
32
+ "max_steps": null,
33
+ "per_device_batch_size": 32,
34
+ "reduce_in_full_precision": true,
35
+ "save_every_n_steps": 25000,
36
+ "shuffle_buffer_size": 256000,
37
+ "train_strategy": "fsdp-full-shard",
38
+ "type": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90",
39
+ "unfreeze_last_llm_layer": false,
40
+ "vla_id": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90",
41
+ "warmup_ratio": 0.0,
42
+ "weight_decay": 0.0
43
+ },
44
+ "wandb_entity": null,
45
+ "wandb_project": "prismatic"
46
+ }
models/minivla-vq-libero90-prismatic/config.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_root_dir: /hai/scratch/belkhale/datasets
2
+ hf_token: .hf_token
3
+ image_aug: false
4
+ is_resume: true
5
+ pretrained_checkpoint: runs/prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer/checkpoints/step-065000-epoch-29-loss=0.4407.pt
6
+ resume_epoch: 29
7
+ resume_step: 65000
8
+ run_id: prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer
9
+ run_id_note: vq_extra_tokenizer
10
+ run_root_dir: runs
11
+ save_interval: 2500
12
+ seed: 7
13
+ trackers:
14
+ - jsonl
15
+ - wandb
16
+ vla:
17
+ action_tokenizer: libero_vq_extra_action_tokenizer
18
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
19
+ data_mix: libero_90
20
+ enable_gradient_checkpointing: true
21
+ enable_mixed_precision_training: true
22
+ epochs: 1000
23
+ expected_world_size: 8
24
+ freeze_llm_backbone: false
25
+ freeze_vision_backbone: false
26
+ global_batch_size: 256
27
+ learning_rate: 2.0e-05
28
+ lr_scheduler_type: constant
29
+ max_grad_norm: 1.0
30
+ max_steps: null
31
+ per_device_batch_size: 32
32
+ reduce_in_full_precision: true
33
+ save_every_n_steps: 25000
34
+ shuffle_buffer_size: 256000
35
+ train_strategy: fsdp-full-shard
36
+ type: prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90
37
+ unfreeze_last_llm_layer: false
38
+ vla_id: prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90
39
+ warmup_ratio: 0.0
40
+ weight_decay: 0.0
41
+ wandb_entity: null
42
+ wandb_project: prismatic
models/minivla-vq-libero90-prismatic/dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "libero_90": {
3
+ "action": {
4
+ "mean": [
5
+ 0.04548764228820801,
6
+ 0.037047673016786575,
7
+ -0.09770790487527847,
8
+ 0.005058619659394026,
9
+ 0.00224184594117105,
10
+ -0.006234025117009878,
11
+ 0.528744101524353
12
+ ],
13
+ "std": [
14
+ 0.2989887297153473,
15
+ 0.361598402261734,
16
+ 0.4069668650627136,
17
+ 0.04840222746133804,
18
+ 0.058257583528757095,
19
+ 0.08737559616565704,
20
+ 0.49890485405921936
21
+ ],
22
+ "max": [
23
+ 0.9375,
24
+ 0.9375,
25
+ 0.9375,
26
+ 0.375,
27
+ 0.375,
28
+ 0.375,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.9375,
33
+ -0.9375,
34
+ -0.9375,
35
+ -0.3257142901420593,
36
+ -0.375,
37
+ -0.375,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.6321428418159485,
42
+ -0.8732143044471741,
43
+ -0.8973214030265808,
44
+ -0.12321428209543228,
45
+ -0.15642857551574707,
46
+ -0.2807142734527588,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.8517857193946838,
51
+ 0.8464285731315613,
52
+ 0.9375,
53
+ 0.1875,
54
+ 0.1778571456670761,
55
+ 0.34928572177886963,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 569230,
125
+ "num_trajectories": 3924
126
+ }
127
+ }
models/minivla-vq-libero90-prismatic/run-metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hparams": {"data_root_dir": "/hai/scratch/belkhale/datasets", "hf_token": ".hf_token", "image_aug": false, "is_resume": true, "pretrained_checkpoint": "runs/prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer/checkpoints/step-065000-epoch-29-loss=0.4407.pt", "resume_epoch": 29, "resume_step": 65000, "run_id": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer", "run_id_note": "vq_extra_tokenizer", "run_root_dir": "runs", "save_interval": 2500, "seed": 7, "trackers": ["jsonl", "wandb"], "vla": {"action_tokenizer": "libero_vq_extra_action_tokenizer", "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b", "data_mix": "libero_90", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 1000, "expected_world_size": 8, "freeze_llm_backbone": false, "freeze_vision_backbone": false, "global_batch_size": 256, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 32, "reduce_in_full_precision": true, "save_every_n_steps": 25000, "shuffle_buffer_size": 256000, "train_strategy": "fsdp-full-shard", "type": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90", "unfreeze_last_llm_layer": false, "vla_id": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": null, "wandb_project": "prismatic"}, "run_id": "prism-qwen25-dinosiglip-224px+0_5b+mx-libero-90+n1+b32+x7--vq_extra_tokenizer"}