YuxueYang
commited on
Commit
·
0a92d82
1
Parent(s):
fd90f34
Upload safetensor
Browse files- .gitattributes +1 -0
- README.md +26 -0
- demos.gif +3 -0
- image_projector/config.json +14 -0
- image_projector/diffusion_pytorch_model.safetensors +3 -0
- layer_controlnet/config.json +49 -0
- layer_controlnet/diffusion_pytorch_model.safetensors +3 -0
- scheduler/scheduler_config.json +19 -0
- unet/config.json +43 -0
- unet/diffusion_pytorch_model.safetensors +3 -0
- vae/config.json +25 -0
- vae/diffusion_pytorch_model.safetensors +3 -0
- vae_dualref/config.json +25 -0
- vae_dualref/diffusion_pytorch_model.safetensors +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,29 @@
|
|
1 |
---
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
+
tags:
|
4 |
+
- video generation
|
5 |
+
- CreateAI
|
6 |
+
pipeline_tag: image-to-video
|
7 |
+
library_name: diffusers
|
8 |
---
|
9 |
+
|
10 |
+
# LayerAnimate-Mix
|
11 |
+
|
12 |
+
[Project](https://layeranimate.github.io) | [Github](https://github.com/IamCreateAI/LayerAnimate) | [Paper](https://arxiv.org/abs/2501.08295)
|
13 |
+
|
14 |
+
<div align="center"> <img src='demos.gif'></img></div>
|
15 |
+
|
16 |
+
**LayerAnimate** is a novel video diffusion framework with layer-aware architecture that empowers the manipulation of layers through layer-level controls.
|
17 |
+
|
18 |
+
## Citation
|
19 |
+
|
20 |
+
Please consider citing our work as follows if it is helpful.
|
21 |
+
|
22 |
+
```bib
|
23 |
+
@article{yang2025layeranimate,
|
24 |
+
author = {Yang, Yuxue and Fan, Lue and Lin, Zuzeng and Wang, Feng and Zhang, Zhaoxiang},
|
25 |
+
title = {LayerAnimate: Layer-specific Control for Animation},
|
26 |
+
journal = {arXiv preprint arXiv:2501.08295},
|
27 |
+
year = {2025},
|
28 |
+
}
|
29 |
+
```
|
demos.gif
ADDED
![]() |
Git LFS Details
|
image_projector/config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "Resampler",
|
3 |
+
"_diffusers_version": "0.30.2",
|
4 |
+
"_name_or_path": "models/i2v",
|
5 |
+
"depth": 4,
|
6 |
+
"dim": 1024,
|
7 |
+
"dim_head": 64,
|
8 |
+
"embedding_dim": 1280,
|
9 |
+
"ff_mult": 4,
|
10 |
+
"heads": 12,
|
11 |
+
"num_queries": 16,
|
12 |
+
"output_dim": 1024,
|
13 |
+
"video_length": 16
|
14 |
+
}
|
image_projector/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:893a58eb98b1b2ed33ccd635ebf8ddad825ff20d0959b582964fd03cc8e37e30
|
3 |
+
size 97579608
|
layer_controlnet/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "LayerControlNet",
|
3 |
+
"_diffusers_version": "0.30.2",
|
4 |
+
"addition_attention": true,
|
5 |
+
"attention_resolutions": [
|
6 |
+
4,
|
7 |
+
2,
|
8 |
+
1
|
9 |
+
],
|
10 |
+
"channel_mult": [
|
11 |
+
1,
|
12 |
+
2,
|
13 |
+
4
|
14 |
+
],
|
15 |
+
"condition_channels": {
|
16 |
+
"motion_score": 2,
|
17 |
+
"sketch": 4,
|
18 |
+
"trajectory": 3
|
19 |
+
},
|
20 |
+
"context_dim": 1024,
|
21 |
+
"control_injection_mode": "add",
|
22 |
+
"conv_resample": true,
|
23 |
+
"default_fps": 24,
|
24 |
+
"dims": 2,
|
25 |
+
"dropout": 0.1,
|
26 |
+
"fps_condition": true,
|
27 |
+
"ignore_noisy_latents": true,
|
28 |
+
"image_cross_attention": true,
|
29 |
+
"image_cross_attention_scale_learnable": false,
|
30 |
+
"in_channels": 5,
|
31 |
+
"model_channels": 320,
|
32 |
+
"num_head_channels": 64,
|
33 |
+
"num_heads": -1,
|
34 |
+
"num_res_blocks": 2,
|
35 |
+
"out_channels": 4,
|
36 |
+
"resblock_updown": false,
|
37 |
+
"temporal_attention": true,
|
38 |
+
"temporal_conv": true,
|
39 |
+
"temporal_length": 16,
|
40 |
+
"temporal_selfatt_only": true,
|
41 |
+
"tempspatial_aware": false,
|
42 |
+
"transformer_depth": 1,
|
43 |
+
"use_causal_attention": false,
|
44 |
+
"use_checkpoint": true,
|
45 |
+
"use_linear": true,
|
46 |
+
"use_relative_position": false,
|
47 |
+
"use_scale_shift_norm": false,
|
48 |
+
"use_vae_for_trajectory": false
|
49 |
+
}
|
layer_controlnet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:370fda8bb902ca1fe3c58811240d608a8b8f6ff22e70efba9c1048d8fe368480
|
3 |
+
size 682530392
|
scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "DDIMScheduler",
|
3 |
+
"_diffusers_version": "0.30.2",
|
4 |
+
"beta_end": 0.012,
|
5 |
+
"beta_schedule": "scaled_linear",
|
6 |
+
"beta_start": 0.00085,
|
7 |
+
"clip_sample": false,
|
8 |
+
"clip_sample_range": 1.0,
|
9 |
+
"dynamic_thresholding_ratio": 0.995,
|
10 |
+
"num_train_timesteps": 1000,
|
11 |
+
"prediction_type": "v_prediction",
|
12 |
+
"rescale_betas_zero_snr": true,
|
13 |
+
"sample_max_value": 1.0,
|
14 |
+
"set_alpha_to_one": true,
|
15 |
+
"steps_offset": 1,
|
16 |
+
"thresholding": false,
|
17 |
+
"timestep_spacing": "leading",
|
18 |
+
"trained_betas": null
|
19 |
+
}
|
unet/config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNetModel",
|
3 |
+
"_diffusers_version": "0.30.2",
|
4 |
+
"addition_attention": true,
|
5 |
+
"attention_resolutions": [
|
6 |
+
4,
|
7 |
+
2,
|
8 |
+
1
|
9 |
+
],
|
10 |
+
"channel_mult": [
|
11 |
+
1,
|
12 |
+
2,
|
13 |
+
4,
|
14 |
+
4
|
15 |
+
],
|
16 |
+
"context_dim": 1024,
|
17 |
+
"conv_resample": true,
|
18 |
+
"default_fps": 24,
|
19 |
+
"dims": 2,
|
20 |
+
"dropout": 0.1,
|
21 |
+
"fps_condition": true,
|
22 |
+
"image_cross_attention": true,
|
23 |
+
"image_cross_attention_scale_learnable": false,
|
24 |
+
"in_channels": 8,
|
25 |
+
"masked_layer_fusion": true,
|
26 |
+
"model_channels": 320,
|
27 |
+
"num_head_channels": 64,
|
28 |
+
"num_heads": -1,
|
29 |
+
"num_res_blocks": 2,
|
30 |
+
"out_channels": 4,
|
31 |
+
"resblock_updown": false,
|
32 |
+
"temporal_attention": true,
|
33 |
+
"temporal_conv": true,
|
34 |
+
"temporal_length": 16,
|
35 |
+
"temporal_selfatt_only": true,
|
36 |
+
"tempspatial_aware": false,
|
37 |
+
"transformer_depth": 1,
|
38 |
+
"use_causal_attention": false,
|
39 |
+
"use_checkpoint": true,
|
40 |
+
"use_linear": true,
|
41 |
+
"use_relative_position": false,
|
42 |
+
"use_scale_shift_norm": false
|
43 |
+
}
|
unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7681439cf7fb436df807b8262f06541bfcc056d293ec2e52352bf2c64c973b37
|
3 |
+
size 3001024704
|
vae/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "AutoencoderKL",
|
3 |
+
"_diffusers_version": "0.30.3",
|
4 |
+
"ddconfig": {
|
5 |
+
"attn_resolutions": [],
|
6 |
+
"ch": 128,
|
7 |
+
"ch_mult": [
|
8 |
+
1,
|
9 |
+
2,
|
10 |
+
4,
|
11 |
+
4
|
12 |
+
],
|
13 |
+
"double_z": true,
|
14 |
+
"dropout": 0.0,
|
15 |
+
"in_channels": 3,
|
16 |
+
"num_res_blocks": 2,
|
17 |
+
"out_ch": 3,
|
18 |
+
"resolution": 256,
|
19 |
+
"z_channels": 4
|
20 |
+
},
|
21 |
+
"embed_dim": 4,
|
22 |
+
"image_key": "image",
|
23 |
+
"input_dim": 4,
|
24 |
+
"use_checkpoint": false
|
25 |
+
}
|
vae/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7053f0e04f33a165d6c20c27727eb7238676ffdc290ca0cb924acdc080b89ae3
|
3 |
+
size 334641012
|
vae_dualref/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "AutoencoderKL_Dualref",
|
3 |
+
"_diffusers_version": "0.11.1",
|
4 |
+
"ddconfig": {
|
5 |
+
"attn_resolutions": [],
|
6 |
+
"ch": 128,
|
7 |
+
"ch_mult": [
|
8 |
+
1,
|
9 |
+
2,
|
10 |
+
4,
|
11 |
+
4
|
12 |
+
],
|
13 |
+
"double_z": true,
|
14 |
+
"dropout": 0.0,
|
15 |
+
"in_channels": 3,
|
16 |
+
"num_res_blocks": 2,
|
17 |
+
"out_ch": 3,
|
18 |
+
"resolution": 256,
|
19 |
+
"z_channels": 4
|
20 |
+
},
|
21 |
+
"embed_dim": 4,
|
22 |
+
"image_key": "image",
|
23 |
+
"input_dim": 4,
|
24 |
+
"use_checkpoint": false
|
25 |
+
}
|
vae_dualref/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec01046000da70640a5f146bc18e9438181ec3ee9cd27c71ab2da3ca9ea8bdde
|
3 |
+
size 399810404
|