IliaLarchenko commited on
Commit
7c0ded9
·
verified ·
1 Parent(s): c1e2410

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +38 -0
  3. config.json +50 -0
  4. config.yaml +322 -0
  5. model.safetensors +3 -0
  6. replay.mp4 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: lerobot
3
+ tags:
4
+ - model_hub_mixin
5
+ - pytorch_model_hub_mixin
6
+ - robotics
7
+ - dot
8
+ license: apache-2.0
9
+ datasets:
10
+ - lerobot/aloha_sim_insertion_human
11
+ pipeline_tag: robotics
12
+ ---
13
+
14
+ # Model Card for "Decoder Only Transformer (DOT) Policy" for ALOHA bimanual insert problem
15
+
16
+ Read more about the model and implementation details in the [DOT Policy repository](https://github.com/IliaLarchenko/dot_policy).
17
+
18
+ This model is trained using the [LeRobot library](https://huggingface.co/lerobot) and achieves state-of-the-art results on behavior cloning on ALOHA bimanual insert dataset. It achieves 29.6% success rate vs. 21% for the previous state-of-the-art model (ACT).
19
+
20
+ This result is achieved without the checkpoint selection and is easy to reproduce.
21
+
22
+ You can use this model by installing LeRobot from [this branch](https://github.com/IliaLarchenko/lerobot/tree/dot)
23
+
24
+ To train the model:
25
+
26
+ ```bash
27
+ python lerobot/scripts/train.py policy=dot_insert env=aloha env.episode_length=500
28
+ ```
29
+
30
+ To evaluate the model:
31
+
32
+ ```bash
33
+ python lerobot/scripts/eval.py -p IliaLarchenko/dot_bimanual_insert eval.n_episodes=1000 eval.batch_size=100 seed=1000000
34
+ ```
35
+
36
+ Model size:
37
+ - Total parameters: 14.1m
38
+ - Trainable parameters: 2.9m
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha": 0.98,
3
+ "crop_scale": 0.8,
4
+ "dim_feedforward": 512,
5
+ "dim_model": 128,
6
+ "dropout": 0.1,
7
+ "inference_horizon": 100,
8
+ "input_normalization_modes": {
9
+ "observation.images.top": "mean_std",
10
+ "observation.state": "min_max"
11
+ },
12
+ "input_shapes": {
13
+ "observation.images.top": [
14
+ 3,
15
+ 480,
16
+ 640
17
+ ],
18
+ "observation.state": [
19
+ 14
20
+ ]
21
+ },
22
+ "lookback_aug": 5,
23
+ "lookback_obs_steps": 30,
24
+ "lora_rank": 20,
25
+ "merge_lora": true,
26
+ "n_decoder_layers": 8,
27
+ "n_heads": 8,
28
+ "n_obs_steps": 3,
29
+ "noise_decay": 0.999995,
30
+ "output_normalization_modes": {
31
+ "action": "min_max"
32
+ },
33
+ "output_shapes": {
34
+ "action": [
35
+ 14
36
+ ]
37
+ },
38
+ "pre_norm": true,
39
+ "predict_every_n": 1,
40
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
41
+ "rescale_shape": [
42
+ 480,
43
+ 640
44
+ ],
45
+ "return_every_n": 1,
46
+ "state_noise": 0.01,
47
+ "train_alpha": 0.99,
48
+ "train_horizon": 150,
49
+ "vision_backbone": "resnet18"
50
+ }
config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume: false
2
+ device: cuda
3
+ use_amp: true
4
+ seed: 100000
5
+ dataset_repo_id: lerobot/aloha_sim_insertion_human
6
+ video_backend: pyav
7
+ training:
8
+ offline_steps: 100000
9
+ num_workers: 12
10
+ batch_size: 12
11
+ eval_freq: 10000
12
+ log_freq: 1000
13
+ save_checkpoint: true
14
+ save_freq: 10000
15
+ online_steps: 0
16
+ online_rollout_n_episodes: 1
17
+ online_rollout_batch_size: 1
18
+ online_steps_between_rollouts: 1
19
+ online_sampling_ratio: 0.5
20
+ online_env_seed: null
21
+ online_buffer_capacity: null
22
+ online_buffer_seed_size: 0
23
+ do_online_rollout_async: false
24
+ image_transforms:
25
+ enable: false
26
+ max_num_transforms: 3
27
+ random_order: false
28
+ brightness:
29
+ weight: 1
30
+ min_max:
31
+ - 0.8
32
+ - 1.2
33
+ contrast:
34
+ weight: 1
35
+ min_max:
36
+ - 0.8
37
+ - 1.2
38
+ saturation:
39
+ weight: 1
40
+ min_max:
41
+ - 0.5
42
+ - 1.5
43
+ hue:
44
+ weight: 1
45
+ min_max:
46
+ - -0.05
47
+ - 0.05
48
+ sharpness:
49
+ weight: 1
50
+ min_max:
51
+ - 0.8
52
+ - 1.2
53
+ save_model: true
54
+ grad_clip_norm: 50
55
+ lr: 3.0e-05
56
+ min_lr: 1.0e-05
57
+ lr_cycle_steps: 100000
58
+ weight_decay: 1.0e-05
59
+ delta_timestamps:
60
+ observation.images.top:
61
+ - -0.7
62
+ - -0.68
63
+ - -0.66
64
+ - -0.64
65
+ - -0.62
66
+ - -0.6
67
+ - -0.58
68
+ - -0.56
69
+ - -0.54
70
+ - -0.52
71
+ - -0.5
72
+ - -0.02
73
+ - 0.0
74
+ observation.state:
75
+ - -0.7
76
+ - -0.68
77
+ - -0.66
78
+ - -0.64
79
+ - -0.62
80
+ - -0.6
81
+ - -0.58
82
+ - -0.56
83
+ - -0.54
84
+ - -0.52
85
+ - -0.5
86
+ - -0.02
87
+ - 0.0
88
+ action:
89
+ - -0.7
90
+ - -0.68
91
+ - -0.66
92
+ - -0.64
93
+ - -0.62
94
+ - -0.6
95
+ - -0.58
96
+ - -0.56
97
+ - -0.54
98
+ - -0.52
99
+ - -0.5
100
+ - -0.02
101
+ - 0.0
102
+ - 0.02
103
+ - 0.04
104
+ - 0.06
105
+ - 0.08
106
+ - 0.1
107
+ - 0.12
108
+ - 0.14
109
+ - 0.16
110
+ - 0.18
111
+ - 0.2
112
+ - 0.22
113
+ - 0.24
114
+ - 0.26
115
+ - 0.28
116
+ - 0.3
117
+ - 0.32
118
+ - 0.34
119
+ - 0.36
120
+ - 0.38
121
+ - 0.4
122
+ - 0.42
123
+ - 0.44
124
+ - 0.46
125
+ - 0.48
126
+ - 0.5
127
+ - 0.52
128
+ - 0.54
129
+ - 0.56
130
+ - 0.58
131
+ - 0.6
132
+ - 0.62
133
+ - 0.64
134
+ - 0.66
135
+ - 0.68
136
+ - 0.7
137
+ - 0.72
138
+ - 0.74
139
+ - 0.76
140
+ - 0.78
141
+ - 0.8
142
+ - 0.82
143
+ - 0.84
144
+ - 0.86
145
+ - 0.88
146
+ - 0.9
147
+ - 0.92
148
+ - 0.94
149
+ - 0.96
150
+ - 0.98
151
+ - 1.0
152
+ - 1.02
153
+ - 1.04
154
+ - 1.06
155
+ - 1.08
156
+ - 1.1
157
+ - 1.12
158
+ - 1.14
159
+ - 1.16
160
+ - 1.18
161
+ - 1.2
162
+ - 1.22
163
+ - 1.24
164
+ - 1.26
165
+ - 1.28
166
+ - 1.3
167
+ - 1.32
168
+ - 1.34
169
+ - 1.36
170
+ - 1.38
171
+ - 1.4
172
+ - 1.42
173
+ - 1.44
174
+ - 1.46
175
+ - 1.48
176
+ - 1.5
177
+ - 1.52
178
+ - 1.54
179
+ - 1.56
180
+ - 1.58
181
+ - 1.6
182
+ - 1.62
183
+ - 1.64
184
+ - 1.66
185
+ - 1.68
186
+ - 1.7
187
+ - 1.72
188
+ - 1.74
189
+ - 1.76
190
+ - 1.78
191
+ - 1.8
192
+ - 1.82
193
+ - 1.84
194
+ - 1.86
195
+ - 1.88
196
+ - 1.9
197
+ - 1.92
198
+ - 1.94
199
+ - 1.96
200
+ - 1.98
201
+ - 2.0
202
+ - 2.02
203
+ - 2.04
204
+ - 2.06
205
+ - 2.08
206
+ - 2.1
207
+ - 2.12
208
+ - 2.14
209
+ - 2.16
210
+ - 2.18
211
+ - 2.2
212
+ - 2.22
213
+ - 2.24
214
+ - 2.26
215
+ - 2.28
216
+ - 2.3
217
+ - 2.32
218
+ - 2.34
219
+ - 2.36
220
+ - 2.38
221
+ - 2.4
222
+ - 2.42
223
+ - 2.44
224
+ - 2.46
225
+ - 2.48
226
+ - 2.5
227
+ - 2.52
228
+ - 2.54
229
+ - 2.56
230
+ - 2.58
231
+ - 2.6
232
+ - 2.62
233
+ - 2.64
234
+ - 2.66
235
+ - 2.68
236
+ - 2.7
237
+ - 2.72
238
+ - 2.74
239
+ - 2.76
240
+ - 2.78
241
+ - 2.8
242
+ - 2.82
243
+ - 2.84
244
+ - 2.86
245
+ - 2.88
246
+ - 2.9
247
+ - 2.92
248
+ - 2.94
249
+ - 2.96
250
+ - 2.98
251
+ eval:
252
+ n_episodes: 50
253
+ batch_size: 10
254
+ use_async_envs: false
255
+ wandb:
256
+ enable: true
257
+ disable_artifact: false
258
+ project: insert
259
+ notes: ''
260
+ fps: 50
261
+ env:
262
+ name: aloha
263
+ task: AlohaInsertion-v0
264
+ state_dim: 14
265
+ action_dim: 14
266
+ fps: ${fps}
267
+ episode_length: 500
268
+ gym:
269
+ obs_type: pixels_agent_pos
270
+ render_mode: rgb_array
271
+ override_dataset_stats:
272
+ observation.images.top:
273
+ mean:
274
+ - - - 0.485
275
+ - - - 0.456
276
+ - - - 0.406
277
+ std:
278
+ - - - 0.229
279
+ - - - 0.224
280
+ - - - 0.225
281
+ policy:
282
+ name: dot
283
+ n_obs_steps: 3
284
+ train_horizon: 150
285
+ inference_horizon: 100
286
+ lookback_obs_steps: 30
287
+ lookback_aug: 5
288
+ input_shapes:
289
+ observation.images.top:
290
+ - 3
291
+ - 480
292
+ - 640
293
+ observation.state:
294
+ - ${env.state_dim}
295
+ output_shapes:
296
+ action:
297
+ - ${env.action_dim}
298
+ input_normalization_modes:
299
+ observation.images.top: mean_std
300
+ observation.state: min_max
301
+ output_normalization_modes:
302
+ action: min_max
303
+ vision_backbone: resnet18
304
+ pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
305
+ rescale_shape:
306
+ - 480
307
+ - 640
308
+ lora_rank: 20
309
+ merge_lora: true
310
+ crop_scale: 0.8
311
+ state_noise: 0.01
312
+ noise_decay: 0.999995
313
+ pre_norm: true
314
+ dim_model: 128
315
+ n_heads: 8
316
+ dim_feedforward: 512
317
+ n_decoder_layers: 8
318
+ dropout: 0.1
319
+ alpha: 0.98
320
+ train_alpha: 0.99
321
+ predict_every_n: 1
322
+ return_every_n: 1
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706683a6b1c1c69f0b5cc577c9dcf08a8761ff30b1b25ab3511f7a0ab050ae5e
3
+ size 56555664
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0d0e1c523e870ff2a57f9cd4823d07335973367c2e9e0ee71913b5894234e9
3
+ size 202117