Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,588 Bytes
11554c5 5eea811 11554c5 0afe03d 11554c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
debug: False
lr: 5e-5
backbone:
name: u_vit3d_pose
channels:
- 128
- 256
- 576
- 1152
emb_channels: 1024
patch_size: 2
block_types:
- ResBlock
- ResBlock
- TransformerBlock
- TransformerBlock
block_dropouts:
- 0.0
- 0.0
- 0.1
- 0.1
num_updown_blocks:
- 3
- 3
- 6
num_mid_blocks: 20
num_heads: 9
pos_emb_type: rope
use_checkpointing:
- false
- false
- false
- true
conditioning:
dim: null
external_cond_dropout: 0.1
use_fourier_noise_embedding: true
x_shape: [3, 256, 256]
max_frames: 8
n_frames: 8
frame_skip: 1
context_frames: 1
latent:
enable: False
type: pre_sample
suffix: null
downsampling_factor: [1, 8]
num_channels: 4
data_mean: [[[0.577]], [[0.517]], [[0.461]]]
data_std: [[[0.249]], [[0.249]], [[0.268]]]
external_cond_dim: 16
external_cond_stack: False
external_cond_processing: null
compile: false
weight_decay: 0.01
optimizer_beta:
- 0.9
- 0.99
lr_scheduler:
name: constant_with_warmup
num_warmup_steps: 10000
num_training_steps: 550000
noise_level: random_independent
uniform_future:
enabled: false
fixed_context:
enabled: false
indices: null
dropout: 0
variable_context:
enabled: false
prob: 0
dropout: 0
chunk_size: -1
scheduling_matrix: full_sequence
replacement: noisy_scale
diffusion:
is_continuous: true
timesteps: 1000
beta_schedule: cosine_simple_diffusion
schedule_fn_kwargs:
shift: 1.0
shifted: 0.125
interpolated: false
use_causal_mask: false
clip_noise: 20.0
objective: pred_v
loss_weighting:
strategy: sigmoid
snr_clip: 5.0
cum_snr_decay: 0.9
sigmoid_bias: -1.0
sampling_timesteps: 50
ddim_sampling_eta: 0.0
reconstruction_guidance: 0.0
training_schedule:
name: cosine
shift: 0.125
precond_scale: 0.125
vae:
pretrained_path: null
pretrained_kwargs: {}
use_fp16: true
batch_size: 2
checkpoint:
reset_optimizer: false
strict: true
tasks:
prediction:
enabled: true
history_guidance:
name: stabilized_vanilla
guidance_scale: 4.0
stabilization_level: 0.02
visualize: False
keyframe_density: null
sliding_context_len: null
interpolation:
enabled: false
history_guidance:
name: vanilla
guidance_scale: 1
visualize: False
max_batch_size: 4
logging:
deterministic: null
loss_freq: 100
grad_norm_freq: 100
max_num_videos: 256
n_metrics_frames: null
metrics: []
metrics_batch_size: 16
sanity_generation: false
raw_dir: null
camera_pose_conditioning:
normalize_by: first
bound: null
type: ray_encoding |