Spaces:
Running
on
T4
Running
on
T4
DeepBeepMeep
commited on
Commit
·
bd5ec3d
1
Parent(s):
72e5204
AccVideo support
Browse files- wan/text2video.py +4 -4
- wgp.py +7 -5
wan/text2video.py
CHANGED
@@ -470,14 +470,14 @@ class WanT2V:
|
|
470 |
latent_noise_factor = t / 1000
|
471 |
for zz, zz_r, ll in zip(z, z_reactive, [latents]):
|
472 |
pass
|
473 |
-
|
474 |
-
|
475 |
|
476 |
if conditioning_latents_size > 0 and overlap_noise > 0:
|
477 |
pass
|
478 |
overlap_noise_factor = overlap_noise / 1000
|
479 |
-
latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor
|
480 |
-
#timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(
|
481 |
|
482 |
if target_camera != None:
|
483 |
latent_model_input = torch.cat([latents, source_latents], dim=1)
|
|
|
470 |
latent_noise_factor = t / 1000
|
471 |
for zz, zz_r, ll in zip(z, z_reactive, [latents]):
|
472 |
pass
|
473 |
+
zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count] = zz_r[:, ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor
|
474 |
+
ll[:, 0:overlapped_latents_size + ref_images_count] = zz_r * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor
|
475 |
|
476 |
if conditioning_latents_size > 0 and overlap_noise > 0:
|
477 |
pass
|
478 |
overlap_noise_factor = overlap_noise / 1000
|
479 |
+
# latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor
|
480 |
+
# timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(target_shape[1] - conditioning_latents_size - ref_images_count))]
|
481 |
|
482 |
if target_camera != None:
|
483 |
latent_model_input = torch.cat([latents, source_latents], dim=1)
|
wgp.py
CHANGED
@@ -42,8 +42,8 @@ global_queue_ref = []
|
|
42 |
AUTOSAVE_FILENAME = "queue.zip"
|
43 |
PROMPT_VARS_MAX = 10
|
44 |
|
45 |
-
target_mmgp_version = "3.4.
|
46 |
-
WanGP_version = "5.
|
47 |
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
48 |
|
49 |
from importlib.metadata import version
|
@@ -3263,11 +3263,13 @@ def generate_video(
|
|
3263 |
if exp > 0:
|
3264 |
from rife.inference import temporal_interpolation
|
3265 |
if sliding_window and window_no > 1:
|
3266 |
-
sample = torch.cat([
|
|
|
3267 |
sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
|
3268 |
sample = sample[:, 1:]
|
3269 |
else:
|
3270 |
sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
|
|
|
3271 |
|
3272 |
output_fps = output_fps * 2**exp
|
3273 |
|
@@ -4843,8 +4845,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4843 |
temporal_upsampling = gr.Dropdown(
|
4844 |
choices=[
|
4845 |
("Disabled", ""),
|
4846 |
-
("Rife x2
|
4847 |
-
("Rife x4
|
4848 |
],
|
4849 |
value=ui_defaults.get("temporal_upsampling", ""),
|
4850 |
visible=True,
|
|
|
42 |
AUTOSAVE_FILENAME = "queue.zip"
|
43 |
PROMPT_VARS_MAX = 10
|
44 |
|
45 |
+
target_mmgp_version = "3.4.8"
|
46 |
+
WanGP_version = "5.41"
|
47 |
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
48 |
|
49 |
from importlib.metadata import version
|
|
|
3263 |
if exp > 0:
|
3264 |
from rife.inference import temporal_interpolation
|
3265 |
if sliding_window and window_no > 1:
|
3266 |
+
sample = torch.cat([previous_before_last_frame, sample], dim=1)
|
3267 |
+
previous_before_last_frame = sample[:, -2:-1].clone()
|
3268 |
sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
|
3269 |
sample = sample[:, 1:]
|
3270 |
else:
|
3271 |
sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
|
3272 |
+
previous_before_last_frame = sample[:, -2:-1].clone()
|
3273 |
|
3274 |
output_fps = output_fps * 2**exp
|
3275 |
|
|
|
4845 |
temporal_upsampling = gr.Dropdown(
|
4846 |
choices=[
|
4847 |
("Disabled", ""),
|
4848 |
+
("Rife x2 frames/s", "rife2"),
|
4849 |
+
("Rife x4 frames/s", "rife4"),
|
4850 |
],
|
4851 |
value=ui_defaults.get("temporal_upsampling", ""),
|
4852 |
visible=True,
|