Spaces:
Running
on
Zero
Running
on
Zero
model_config: | |
model_name: HunyuanVideo-Foley-XXL | |
model_type: 1d | |
model_precision: bf16 | |
model_kwargs: | |
depth_triple_blocks: 18 | |
depth_single_blocks: 36 | |
hidden_size: 1536 | |
num_heads: 12 | |
mlp_ratio: 4 | |
mlp_act_type: "gelu_tanh" | |
qkv_bias: True | |
qk_norm: True | |
qk_norm_type: "rms" | |
attn_mode: "torch" | |
embedder_type: "default" | |
interleaved_audio_visual_rope: True | |
enable_learnable_empty_visual_feat: True | |
sync_modulation: False | |
add_sync_feat_to_audio: True | |
cross_attention: True | |
use_attention_mask: False | |
condition_projection: "linear" | |
sync_feat_dim: 768 # syncformer 768 dim | |
condition_dim: 768 # clap 768 text condition dim (clip-text) | |
clip_dim: 768 # siglip2 visual dim | |
audio_vae_latent_dim: 128 | |
audio_frame_rate: 50 | |
patch_size: 1 | |
rope_dim_list: null | |
rope_theta: 10000 | |
text_length: 77 | |
clip_length: 64 | |
sync_length: 192 | |
use_mmaudio_singleblock: True | |
depth_triple_ssl_encoder: null | |
depth_single_ssl_encoder: 8 | |
use_repa_with_audiossl: True | |
diffusion_config: | |
denoise_type: "flow" | |
flow_path_type: "linear" | |
flow_predict_type: "velocity" | |
flow_reverse: True | |
flow_solver: "euler" | |
sample_flow_shift: 1.0 | |
sample_use_flux_shift: False | |
flux_base_shift: 0.5 | |
flux_max_shift: 1.15 | |