File size: 2,588 Bytes
11554c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5eea811
11554c5
0afe03d
11554c5
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
debug: False
lr: 5e-5
backbone:
  name: u_vit3d_pose
  channels:
  - 128
  - 256
  - 576
  - 1152
  emb_channels: 1024
  patch_size: 2
  block_types:
  - ResBlock
  - ResBlock
  - TransformerBlock
  - TransformerBlock
  block_dropouts:
  - 0.0
  - 0.0
  - 0.1
  - 0.1
  num_updown_blocks:
  - 3
  - 3
  - 6
  num_mid_blocks: 20
  num_heads: 9
  pos_emb_type: rope
  use_checkpointing:
  - false
  - false
  - false
  - true
  conditioning:
    dim: null
  external_cond_dropout: 0.1
  use_fourier_noise_embedding: true
x_shape: [3, 256, 256]
max_frames: 8
n_frames: 8
frame_skip: 1
context_frames: 1
latent:
  enable: False
  type: pre_sample
  suffix: null
  downsampling_factor: [1, 8]
  num_channels: 4
data_mean: [[[0.577]], [[0.517]], [[0.461]]]
data_std: [[[0.249]], [[0.249]], [[0.268]]]
external_cond_dim: 16
external_cond_stack: False
external_cond_processing: null
compile: false
weight_decay: 0.01
optimizer_beta:
- 0.9
- 0.99
lr_scheduler:
  name: constant_with_warmup
  num_warmup_steps: 10000
  num_training_steps: 550000
noise_level: random_independent
uniform_future:
  enabled: false
fixed_context:
  enabled: false
  indices: null
  dropout: 0
variable_context:
  enabled: false
  prob: 0
  dropout: 0
chunk_size: -1
scheduling_matrix: full_sequence
replacement: noisy_scale
diffusion:
  is_continuous: true
  timesteps: 1000
  beta_schedule: cosine_simple_diffusion
  schedule_fn_kwargs:
    shift: 1.0
    shifted: 0.125
    interpolated: false
  use_causal_mask: false
  clip_noise: 20.0
  objective: pred_v
  loss_weighting:
    strategy: sigmoid
    snr_clip: 5.0
    cum_snr_decay: 0.9
    sigmoid_bias: -1.0
  sampling_timesteps: 50
  ddim_sampling_eta: 0.0
  reconstruction_guidance: 0.0
  training_schedule:
    name: cosine
    shift: 0.125
  precond_scale: 0.125
vae:
  pretrained_path: null
  pretrained_kwargs: {}
  use_fp16: true
  batch_size: 2
checkpoint:
  reset_optimizer: false
  strict: true
tasks:
  prediction:
    enabled: true
    history_guidance:
      name: stabilized_vanilla
      guidance_scale: 4.0
      stabilization_level: 0.02
      visualize: False
    keyframe_density: null
    sliding_context_len: null
  interpolation:
    enabled: false
    history_guidance:
      name: vanilla
      guidance_scale: 1
      visualize: False
    max_batch_size: 4
logging:
  deterministic: null
  loss_freq: 100
  grad_norm_freq: 100
  max_num_videos: 256
  n_metrics_frames: null
  metrics: []
  metrics_batch_size: 16
  sanity_generation: false
  raw_dir: null
camera_pose_conditioning:
  normalize_by: first
  bound: null
  type: ray_encoding