Spaces:
Running
on
T4
Running
on
T4
DeepBeepMeep
commited on
Commit
·
896a1b4
1
Parent(s):
f0621e1
added multiple ways to extract settings from past video generations
Browse files- README.md +4 -0
- ltx_video/pipelines/pipeline_ltx_video.py +1 -1
- wan/text2video.py +19 -6
- wgp.py +219 -44
README.md
CHANGED
@@ -21,6 +21,10 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
|
|
21 |
|
22 |
|
23 |
## 🔥 Latest News!!
|
|
|
|
|
|
|
|
|
24 |
* May 23 2025: 👋 Wan 2.1GP v5.21 : Improvements for Vace: better transitions between Sliding Windows,Support for Image masks in Matanyone, new Extend Video for Vace, different types of automated background removal
|
25 |
* May 20 2025: 👋 Wan 2.1GP v5.2 : Added support for Wan CausVid which is a distilled Wan model that can generate nice looking videos in only 4 to 12 steps.
|
26 |
The great thing is that Kijai (Kudos to him !) has created a CausVid Lora that can be combined with any existing Wan t2v model 14B like Wan Vace 14B.
|
|
|
21 |
|
22 |
|
23 |
## 🔥 Latest News!!
|
24 |
+
* May 26 2025: 👋 Wan 2.1GP v5.3 : Happy with a Video generation and want to do more generations using the same settings but you can't remember what you did or you find it to hard to copy / paste one per one each setting from the file metadata ? Rejoice ! There are now multiple ways to turn this tedious process into a one click task:
|
25 |
+
- Select one Video recently generated in the Video Gallery and click *Use Selected Video Settings*
|
26 |
+
- Click *Drop File Here* and select a Video you saved somewhere, if the settings metadata have been saved with the Video you will be able to extract them automatically
|
27 |
+
- Click *Export Settings to File* to save on your harddrive the current settings. You will be able to use them later again by clicking *Drop File Here* and select this time a Settings json file
|
28 |
* May 23 2025: 👋 Wan 2.1GP v5.21 : Improvements for Vace: better transitions between Sliding Windows,Support for Image masks in Matanyone, new Extend Video for Vace, different types of automated background removal
|
29 |
* May 20 2025: 👋 Wan 2.1GP v5.2 : Added support for Wan CausVid which is a distilled Wan model that can generate nice looking videos in only 4 to 12 steps.
|
30 |
The great thing is that Kijai (Kudos to him !) has created a CausVid Lora that can be combined with any existing Wan t2v model 14B like Wan Vace 14B.
|
ltx_video/pipelines/pipeline_ltx_video.py
CHANGED
@@ -1113,7 +1113,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
1113 |
)
|
1114 |
|
1115 |
latent_model_input = (
|
1116 |
-
|
1117 |
)
|
1118 |
latent_model_input = self.scheduler.scale_model_input(
|
1119 |
latent_model_input, t
|
|
|
1113 |
)
|
1114 |
|
1115 |
latent_model_input = (
|
1116 |
+
torch.cat([latents] * num_conds) if num_conds > 1 else latents
|
1117 |
)
|
1118 |
latent_model_input = self.scheduler.scale_model_input(
|
1119 |
latent_model_input, t
|
wan/text2video.py
CHANGED
@@ -124,7 +124,7 @@ class WanT2V:
|
|
124 |
reactive = [i * m + 0 * (1 - m) for i, m in zip(frames, masks)]
|
125 |
inactive = self.vae.encode(inactive, tile_size = tile_size)
|
126 |
self.toto = inactive[0].clone()
|
127 |
-
if overlapped_latents != None
|
128 |
# inactive[0][:, 0:1] = self.vae.encode([frames[0][:, 0:1]], tile_size = tile_size)[0] # redundant
|
129 |
inactive[0][:, 1:overlapped_latents.shape[1] + 1] = overlapped_latents
|
130 |
|
@@ -303,6 +303,7 @@ class WanT2V:
|
|
303 |
overlapped_latents = None,
|
304 |
return_latent_slice = None,
|
305 |
overlap_noise = 0,
|
|
|
306 |
model_filename = None,
|
307 |
**bbargs
|
308 |
):
|
@@ -445,8 +446,9 @@ class WanT2V:
|
|
445 |
if vace:
|
446 |
ref_images_count = len(input_ref_images[0]) if input_ref_images != None and input_ref_images[0] != None else 0
|
447 |
kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
|
448 |
-
if overlapped_latents != None:
|
449 |
overlapped_latents_size = overlapped_latents.shape[1] + 1
|
|
|
450 |
z_reactive = [ zz[0:16, 0:overlapped_latents_size + ref_images_count].clone() for zz in z]
|
451 |
|
452 |
|
@@ -456,22 +458,33 @@ class WanT2V:
|
|
456 |
self.model.compute_teacache_threshold(self.model.teacache_start_step, timesteps, self.model.teacache_multiplier)
|
457 |
if callback != None:
|
458 |
callback(-1, None, True)
|
|
|
459 |
for i, t in enumerate(tqdm(timesteps)):
|
460 |
-
|
|
|
|
|
461 |
# overlap_noise_factor = overlap_noise *(i/(len(timesteps)-1)) / 1000
|
462 |
overlap_noise_factor = overlap_noise / 1000
|
|
|
|
|
463 |
latent_noise_factor = t / 1000
|
464 |
for zz, zz_r, ll in zip(z, z_reactive, [latents]):
|
465 |
pass
|
466 |
-
zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count] = zz_r[:, ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor
|
467 |
-
ll[:, 0:overlapped_latents_size + ref_images_count] = zz_r * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
if target_camera != None:
|
469 |
latent_model_input = torch.cat([latents, source_latents], dim=1)
|
470 |
else:
|
471 |
latent_model_input = latents
|
472 |
kwargs["slg_layers"] = slg_layers if int(slg_start * sampling_steps) <= i < int(slg_end * sampling_steps) else None
|
473 |
|
474 |
-
timestep = [t]
|
475 |
offload.set_step_no_for_lora(self.model, i)
|
476 |
timestep = torch.stack(timestep)
|
477 |
kwargs["current_step"] = i
|
|
|
124 |
reactive = [i * m + 0 * (1 - m) for i, m in zip(frames, masks)]
|
125 |
inactive = self.vae.encode(inactive, tile_size = tile_size)
|
126 |
self.toto = inactive[0].clone()
|
127 |
+
if overlapped_latents != None :
|
128 |
# inactive[0][:, 0:1] = self.vae.encode([frames[0][:, 0:1]], tile_size = tile_size)[0] # redundant
|
129 |
inactive[0][:, 1:overlapped_latents.shape[1] + 1] = overlapped_latents
|
130 |
|
|
|
303 |
overlapped_latents = None,
|
304 |
return_latent_slice = None,
|
305 |
overlap_noise = 0,
|
306 |
+
conditioning_latents_size = 0,
|
307 |
model_filename = None,
|
308 |
**bbargs
|
309 |
):
|
|
|
446 |
if vace:
|
447 |
ref_images_count = len(input_ref_images[0]) if input_ref_images != None and input_ref_images[0] != None else 0
|
448 |
kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
|
449 |
+
if overlapped_latents != None :
|
450 |
overlapped_latents_size = overlapped_latents.shape[1] + 1
|
451 |
+
# overlapped_latents_size = 3
|
452 |
z_reactive = [ zz[0:16, 0:overlapped_latents_size + ref_images_count].clone() for zz in z]
|
453 |
|
454 |
|
|
|
458 |
self.model.compute_teacache_threshold(self.model.teacache_start_step, timesteps, self.model.teacache_multiplier)
|
459 |
if callback != None:
|
460 |
callback(-1, None, True)
|
461 |
+
prev = 50/1000
|
462 |
for i, t in enumerate(tqdm(timesteps)):
|
463 |
+
|
464 |
+
timestep = [t]
|
465 |
+
if overlapped_latents != None :
|
466 |
# overlap_noise_factor = overlap_noise *(i/(len(timesteps)-1)) / 1000
|
467 |
overlap_noise_factor = overlap_noise / 1000
|
468 |
+
# overlap_noise_factor = (1000-t )/ 1000 # overlap_noise / 1000
|
469 |
+
# latent_noise_factor = 1 #max(min(1, (t - overlap_noise) / 1000 ),0)
|
470 |
latent_noise_factor = t / 1000
|
471 |
for zz, zz_r, ll in zip(z, z_reactive, [latents]):
|
472 |
pass
|
473 |
+
# zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count] = zz_r[:, ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor
|
474 |
+
# ll[:, 0:overlapped_latents_size + ref_images_count] = zz_r * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor
|
475 |
+
|
476 |
+
if conditioning_latents_size > 0 and overlap_noise > 0:
|
477 |
+
pass
|
478 |
+
overlap_noise_factor = overlap_noise / 1000
|
479 |
+
latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor
|
480 |
+
timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]
|
481 |
+
|
482 |
if target_camera != None:
|
483 |
latent_model_input = torch.cat([latents, source_latents], dim=1)
|
484 |
else:
|
485 |
latent_model_input = latents
|
486 |
kwargs["slg_layers"] = slg_layers if int(slg_start * sampling_steps) <= i < int(slg_end * sampling_steps) else None
|
487 |
|
|
|
488 |
offload.set_step_no_for_lora(self.model, i)
|
489 |
timestep = torch.stack(timestep)
|
490 |
kwargs["current_step"] = i
|
wgp.py
CHANGED
@@ -43,6 +43,7 @@ AUTOSAVE_FILENAME = "queue.zip"
|
|
43 |
PROMPT_VARS_MAX = 10
|
44 |
|
45 |
target_mmgp_version = "3.4.7"
|
|
|
46 |
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
47 |
|
48 |
from importlib.metadata import version
|
@@ -434,6 +435,7 @@ def process_prompt_and_add_tasks(state, model_choice):
|
|
434 |
"prompt" : single_prompt,
|
435 |
"image_start": start,
|
436 |
"image_end" : end,
|
|
|
437 |
}
|
438 |
inputs.update(extra_inputs)
|
439 |
add_video_task(**inputs)
|
@@ -1535,7 +1537,7 @@ wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/w
|
|
1535 |
"ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
|
1536 |
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_lora128_bf16.safetensors"]
|
1537 |
|
1538 |
-
hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/
|
1539 |
"ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
|
1540 |
|
1541 |
transformer_choices = wan_choices_t2v + wan_choices_i2v + ltxv_choices + hunyuan_choices
|
@@ -2323,13 +2325,13 @@ def apply_changes( state,
|
|
2323 |
transformer_dtype_policy = server_config["transformer_dtype_policy"]
|
2324 |
text_encoder_quantization = server_config["text_encoder_quantization"]
|
2325 |
transformer_types = server_config["transformer_types"]
|
2326 |
-
|
2327 |
-
|
2328 |
if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
|
2329 |
model_choice = gr.Dropdown()
|
2330 |
else:
|
2331 |
reload_needed = True
|
2332 |
-
model_choice = generate_dropdown_model_list()
|
2333 |
|
2334 |
header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
|
2335 |
return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1)
|
@@ -2699,6 +2701,8 @@ def generate_video(
|
|
2699 |
torch.set_grad_enabled(False)
|
2700 |
|
2701 |
file_list = gen["file_list"]
|
|
|
|
|
2702 |
prompt_no = gen["prompt_no"]
|
2703 |
|
2704 |
fit_canvas = server_config.get("fit_canvas", 0)
|
@@ -2796,7 +2800,8 @@ def generate_video(
|
|
2796 |
seed = None if seed == -1 else seed
|
2797 |
# negative_prompt = "" # not applicable in the inference
|
2798 |
image2video = test_class_i2v(model_filename)
|
2799 |
-
|
|
|
2800 |
# VAE Tiling
|
2801 |
device_mem_capacity = torch.cuda.get_device_properties(None).total_memory / 1048576
|
2802 |
|
@@ -2849,7 +2854,7 @@ def generate_video(
|
|
2849 |
source_video = None
|
2850 |
target_camera = None
|
2851 |
if "recam" in model_filename:
|
2852 |
-
source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames=
|
2853 |
target_camera = model_mode
|
2854 |
|
2855 |
audio_proj_split = None
|
@@ -2859,8 +2864,8 @@ def generate_video(
|
|
2859 |
from fantasytalking.infer import parse_audio
|
2860 |
import librosa
|
2861 |
duration = librosa.get_duration(path=audio_guide)
|
2862 |
-
|
2863 |
-
audio_proj_split, audio_context_lens = parse_audio(audio_guide, num_frames=
|
2864 |
audio_scale = 1.0
|
2865 |
|
2866 |
import random
|
@@ -2884,19 +2889,19 @@ def generate_video(
|
|
2884 |
else:
|
2885 |
reuse_frames = 0
|
2886 |
if (diffusion_forcing or ltxv) and source_video != None:
|
2887 |
-
|
2888 |
-
sliding_window = (vace or diffusion_forcing or ltxv) and
|
2889 |
|
2890 |
discard_last_frames = sliding_window_discard_last_frames
|
2891 |
-
default_max_frames_to_generate =
|
2892 |
if sliding_window:
|
2893 |
-
left_after_first_window =
|
2894 |
initial_total_windows= 1 + math.ceil(left_after_first_window / (sliding_window_size - discard_last_frames - reuse_frames))
|
2895 |
-
|
2896 |
else:
|
2897 |
initial_total_windows = 1
|
2898 |
|
2899 |
-
first_window_video_length =
|
2900 |
original_prompts = prompts.copy()
|
2901 |
gen["sliding_window"] = sliding_window
|
2902 |
while not abort:
|
@@ -2917,7 +2922,7 @@ def generate_video(
|
|
2917 |
window_no = 0
|
2918 |
extra_windows = 0
|
2919 |
guide_start_frame = 0
|
2920 |
-
|
2921 |
gen["extra_windows"] = 0
|
2922 |
gen["total_windows"] = 1
|
2923 |
gen["window_no"] = 1
|
@@ -2967,7 +2972,7 @@ def generate_video(
|
|
2967 |
num_frames_generated -= reuse_frames
|
2968 |
if (max_frames_to_generate - prefix_video_frames_count - num_frames_generated) < latent_size:
|
2969 |
break
|
2970 |
-
|
2971 |
|
2972 |
total_windows = initial_total_windows + extra_windows
|
2973 |
gen["total_windows"] = total_windows
|
@@ -3015,11 +3020,11 @@ def generate_video(
|
|
3015 |
|
3016 |
if preprocess_type != None :
|
3017 |
send_cmd("progress", progress_args)
|
3018 |
-
video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames=
|
3019 |
keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
|
3020 |
if len(error) > 0:
|
3021 |
raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
|
3022 |
-
keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame +
|
3023 |
|
3024 |
if window_no == 1:
|
3025 |
image_size = (height, width) # default frame dimensions until it is set by video_src (if there is any)
|
@@ -3028,13 +3033,18 @@ def generate_video(
|
|
3028 |
src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
|
3029 |
[video_mask_copy ],
|
3030 |
[image_refs_copy],
|
3031 |
-
|
3032 |
original_video= "O" in video_prompt_type,
|
3033 |
keep_frames=keep_frames_parsed,
|
3034 |
start_frame = guide_start_frame,
|
3035 |
pre_src_video = [pre_video_guide],
|
3036 |
fit_into_canvas = fit_canvas
|
3037 |
)
|
|
|
|
|
|
|
|
|
|
|
3038 |
status = get_latest_status(state)
|
3039 |
gen["progress_status"] = status
|
3040 |
gen["progress_phase"] = ("Encoding Prompt", -1 )
|
@@ -3062,7 +3072,7 @@ def generate_video(
|
|
3062 |
input_masks = src_mask,
|
3063 |
input_video= pre_video_guide if diffusion_forcing or ltxv else source_video,
|
3064 |
target_camera= target_camera,
|
3065 |
-
frame_num=(
|
3066 |
height = height,
|
3067 |
width = width,
|
3068 |
fit_into_canvas = fit_canvas == 1,
|
@@ -3092,6 +3102,7 @@ def generate_video(
|
|
3092 |
overlapped_latents = overlapped_latents,
|
3093 |
return_latent_slice= return_latent_slice,
|
3094 |
overlap_noise = sliding_window_overlap_noise,
|
|
|
3095 |
model_filename = model_filename,
|
3096 |
)
|
3097 |
except Exception as e:
|
@@ -3160,7 +3171,7 @@ def generate_video(
|
|
3160 |
if gen.get("extra_windows",0) > 0:
|
3161 |
sliding_window = True
|
3162 |
if sliding_window :
|
3163 |
-
guide_start_frame +=
|
3164 |
if discard_last_frames > 0:
|
3165 |
sample = sample[: , :-discard_last_frames]
|
3166 |
guide_start_frame -= discard_last_frames
|
@@ -3259,7 +3270,7 @@ def generate_video(
|
|
3259 |
inputs.pop("task")
|
3260 |
configs = prepare_inputs_dict("metadata", inputs)
|
3261 |
configs["prompt"] = "\n".join(original_prompts)
|
3262 |
-
if prompt_enhancer_image_caption_model != None:
|
3263 |
configs["enhanced_prompt"] = "\n".join(prompts)
|
3264 |
configs["generation_time"] = round(end_time-start_time)
|
3265 |
metadata_choice = server_config.get("metadata_type","metadata")
|
@@ -3274,6 +3285,8 @@ def generate_video(
|
|
3274 |
|
3275 |
print(f"New video saved to Path: "+video_path)
|
3276 |
file_list.append(video_path)
|
|
|
|
|
3277 |
send_cmd("output")
|
3278 |
|
3279 |
seed += 1
|
@@ -3520,6 +3533,7 @@ def process_tasks(state):
|
|
3520 |
gen = get_gen_info(state)
|
3521 |
clear_file_list = server_config.get("clear_file_list", 0)
|
3522 |
file_list = gen.get("file_list", [])
|
|
|
3523 |
if clear_file_list > 0:
|
3524 |
file_list_current_size = len(file_list)
|
3525 |
keep_file_from = max(file_list_current_size - clear_file_list, 0)
|
@@ -3527,11 +3541,13 @@ def process_tasks(state):
|
|
3527 |
choice = gen.get("selected",0)
|
3528 |
choice = max(choice- files_removed, 0)
|
3529 |
file_list = file_list[ keep_file_from: ]
|
|
|
3530 |
else:
|
3531 |
file_list = []
|
3532 |
choice = 0
|
3533 |
gen["selected"] = choice
|
3534 |
gen["file_list"] = file_list
|
|
|
3535 |
|
3536 |
start_time = time.time()
|
3537 |
|
@@ -4005,12 +4021,12 @@ def prepare_inputs_dict(target, inputs ):
|
|
4005 |
|
4006 |
if target == "state":
|
4007 |
return inputs
|
4008 |
-
unsaved_params = ["image_start", "image_end", "image_refs", "video_guide", "video_source", "video_mask", "audio_guide"
|
4009 |
for k in unsaved_params:
|
4010 |
inputs.pop(k)
|
4011 |
|
4012 |
model_filename = state["model_filename"]
|
4013 |
-
inputs["type"] = "WanGP by DeepBeepMeep - " + get_model_name(model_filename)
|
4014 |
|
4015 |
if target == "settings":
|
4016 |
return inputs
|
@@ -4021,10 +4037,10 @@ def prepare_inputs_dict(target, inputs ):
|
|
4021 |
if not server_config.get("enhancer_enabled", 0) == 1:
|
4022 |
inputs.pop("prompt_enhancer")
|
4023 |
|
4024 |
-
if not "recam" in model_filename
|
4025 |
inputs.pop("model_mode")
|
4026 |
|
4027 |
-
if not "Vace" in model_filename
|
4028 |
unsaved_params = ["keep_frames_video_guide", "video_prompt_type", "remove_background_images_ref"]
|
4029 |
for k in unsaved_params:
|
4030 |
inputs.pop(k)
|
@@ -4035,7 +4051,7 @@ def prepare_inputs_dict(target, inputs ):
|
|
4035 |
inputs.pop(k)
|
4036 |
|
4037 |
|
4038 |
-
if not "Vace" in model_filename
|
4039 |
unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]
|
4040 |
for k in unsaved_params:
|
4041 |
inputs.pop(k)
|
@@ -4043,6 +4059,8 @@ def prepare_inputs_dict(target, inputs ):
|
|
4043 |
if not "fantasy" in model_filename:
|
4044 |
inputs.pop("audio_guidance_scale")
|
4045 |
|
|
|
|
|
4046 |
|
4047 |
if target == "metadata":
|
4048 |
inputs = {k: v for k,v in inputs.items() if v != None }
|
@@ -4055,7 +4073,85 @@ def get_function_arguments(func, locals):
|
|
4055 |
for k in args_names:
|
4056 |
kwargs[k] = locals[k]
|
4057 |
return kwargs
|
4058 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4059 |
|
4060 |
def save_inputs(
|
4061 |
target,
|
@@ -4420,7 +4516,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4420 |
if ltxv:
|
4421 |
model_mode = gr.Dropdown(
|
4422 |
choices=[
|
4423 |
-
],
|
4424 |
visible= False
|
4425 |
)
|
4426 |
else:
|
@@ -4470,7 +4566,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4470 |
columns=[3], rows=[1], object_fit="contain", height="auto", selected_index=0, interactive= True, visible="E" in image_prompt_type_value, value= ui_defaults.get("image_end", None))
|
4471 |
|
4472 |
video_source = gr.Video(visible=False)
|
4473 |
-
model_mode = gr.Dropdown(visible=False)
|
4474 |
keep_frames_video_source = gr.Text(visible=False)
|
4475 |
|
4476 |
with gr.Column(visible= vace or phantom or hunyuan_video_custom) as video_prompt_column:
|
@@ -4784,14 +4880,19 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4784 |
|
4785 |
with gr.Row():
|
4786 |
save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
|
4787 |
-
|
|
|
|
|
|
|
|
|
4788 |
if not update_form:
|
4789 |
with gr.Column():
|
4790 |
gen_status = gr.Text(interactive= False, label = "Status")
|
4791 |
status_trigger = gr.Text(interactive= False, visible=False)
|
4792 |
output = gr.Gallery( label="Generated videos", show_label=False, elem_id="gallery" , columns=[3], rows=[1], object_fit="contain", height=450, selected_index=0, interactive= False)
|
4793 |
output_trigger = gr.Text(interactive= False, visible=False)
|
4794 |
-
|
|
|
4795 |
generate_btn = gr.Button("Generate")
|
4796 |
add_to_queue_btn = gr.Button("Add New Prompt To Queue", visible = False)
|
4797 |
|
@@ -4891,6 +4992,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4891 |
gen["status_display"] = True
|
4892 |
return time.time()
|
4893 |
|
|
|
|
|
4894 |
status_trigger.change(refresh_status_async, inputs= [state] , outputs= [gen_status], show_progress_on= [gen_status])
|
4895 |
|
4896 |
output_trigger.change(refresh_gallery,
|
@@ -4907,8 +5010,49 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4907 |
locals_dict = locals()
|
4908 |
gen_inputs = [locals_dict[k] for k in inputs_names] + [state]
|
4909 |
save_settings_btn.click( fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] , outputs= [prompt]).then(
|
4910 |
-
|
4911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4912 |
|
4913 |
model_choice.change(fn=validate_wizard_prompt,
|
4914 |
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
|
@@ -4916,10 +5060,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4916 |
).then(fn=save_inputs,
|
4917 |
inputs =[target_state] + gen_inputs,
|
4918 |
outputs= None
|
4919 |
-
|
4920 |
inputs=[state, model_choice],
|
4921 |
outputs= [header]
|
4922 |
-
|
4923 |
inputs=[state],
|
4924 |
outputs=gen_inputs + extra_inputs
|
4925 |
).then(fn= preload_model_when_switching,
|
@@ -4988,7 +5132,6 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4988 |
)
|
4989 |
|
4990 |
|
4991 |
-
start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js = get_timer_js()
|
4992 |
|
4993 |
single_hidden_trigger_btn.click(
|
4994 |
fn=show_countdown_info_from_state,
|
@@ -5348,9 +5491,9 @@ def generate_info_tab():
|
|
5348 |
gr.Markdown("Please note that if your turn on compilation, the first denoising step of the first video generation will be slow due to the compilation. Therefore all your tests should be done with compilation turned off.")
|
5349 |
|
5350 |
|
5351 |
-
def generate_dropdown_model_list():
|
5352 |
dropdown_types= transformer_types if len(transformer_types) > 0 else model_types
|
5353 |
-
current_model_type = get_model_type(
|
5354 |
if current_model_type not in dropdown_types:
|
5355 |
dropdown_types.append(current_model_type)
|
5356 |
model_list = []
|
@@ -5385,7 +5528,7 @@ def select_tab(tab_state, evt:gr.SelectData):
|
|
5385 |
tab_state["tab_no"] = new_tab_no
|
5386 |
return gr.Tabs()
|
5387 |
|
5388 |
-
def
|
5389 |
start_quit_timer_js = """
|
5390 |
() => {
|
5391 |
function findAndClickGradioButton(elemId) {
|
@@ -5454,9 +5597,41 @@ def get_timer_js():
|
|
5454 |
}
|
5455 |
}
|
5456 |
"""
|
5457 |
-
return start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js
|
5458 |
|
5459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5460 |
global vmc_event_handler
|
5461 |
css = """
|
5462 |
#model_list{
|
@@ -5699,7 +5874,7 @@ def create_demo():
|
|
5699 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
5700 |
|
5701 |
with gr.Blocks(css=css, theme=theme, title= "WanGP") as main:
|
5702 |
-
gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP>
|
5703 |
global model_list
|
5704 |
|
5705 |
tab_state = gr.State({ "tab_no":0 })
|
@@ -5712,7 +5887,7 @@ def create_demo():
|
|
5712 |
model_choice = gr.Dropdown(visible=False, value= get_model_type(transformer_filename))
|
5713 |
else:
|
5714 |
gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
|
5715 |
-
model_choice = generate_dropdown_model_list()
|
5716 |
gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
|
5717 |
with gr.Row():
|
5718 |
header = gr.Markdown(generate_header(transformer_filename, compile, attention_mode), visible= True)
|
@@ -5753,7 +5928,7 @@ if __name__ == "__main__":
|
|
5753 |
server_name = "0.0.0.0"
|
5754 |
if len(server_name) == 0:
|
5755 |
server_name = os.getenv("SERVER_NAME", "localhost")
|
5756 |
-
demo =
|
5757 |
if args.open_browser:
|
5758 |
import webbrowser
|
5759 |
if server_name.startswith("http"):
|
|
|
43 |
PROMPT_VARS_MAX = 10
|
44 |
|
45 |
target_mmgp_version = "3.4.7"
|
46 |
+
WanGP_version = "5.3"
|
47 |
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
48 |
|
49 |
from importlib.metadata import version
|
|
|
435 |
"prompt" : single_prompt,
|
436 |
"image_start": start,
|
437 |
"image_end" : end,
|
438 |
+
"video_source": None,
|
439 |
}
|
440 |
inputs.update(extra_inputs)
|
441 |
add_video_task(**inputs)
|
|
|
1537 |
"ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
|
1538 |
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_lora128_bf16.safetensors"]
|
1539 |
|
1540 |
+
hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16v2.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
|
1541 |
"ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
|
1542 |
|
1543 |
transformer_choices = wan_choices_t2v + wan_choices_i2v + ltxv_choices + hunyuan_choices
|
|
|
2325 |
transformer_dtype_policy = server_config["transformer_dtype_policy"]
|
2326 |
text_encoder_quantization = server_config["text_encoder_quantization"]
|
2327 |
transformer_types = server_config["transformer_types"]
|
2328 |
+
model_filename = get_model_filename(get_model_type(state["model_filename"]), transformer_quantization, transformer_dtype_policy)
|
2329 |
+
state["model_filename"] = model_filename
|
2330 |
if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
|
2331 |
model_choice = gr.Dropdown()
|
2332 |
else:
|
2333 |
reload_needed = True
|
2334 |
+
model_choice = generate_dropdown_model_list(model_filename)
|
2335 |
|
2336 |
header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
|
2337 |
return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1)
|
|
|
2701 |
torch.set_grad_enabled(False)
|
2702 |
|
2703 |
file_list = gen["file_list"]
|
2704 |
+
file_settings_list = gen["file_settings_list"]
|
2705 |
+
|
2706 |
prompt_no = gen["prompt_no"]
|
2707 |
|
2708 |
fit_canvas = server_config.get("fit_canvas", 0)
|
|
|
2800 |
seed = None if seed == -1 else seed
|
2801 |
# negative_prompt = "" # not applicable in the inference
|
2802 |
image2video = test_class_i2v(model_filename)
|
2803 |
+
current_video_length = video_length
|
2804 |
+
enable_RIFLEx = RIFLEx_setting == 0 and current_video_length > (6* 16) or RIFLEx_setting == 1
|
2805 |
# VAE Tiling
|
2806 |
device_mem_capacity = torch.cuda.get_device_properties(None).total_memory / 1048576
|
2807 |
|
|
|
2854 |
source_video = None
|
2855 |
target_camera = None
|
2856 |
if "recam" in model_filename:
|
2857 |
+
source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= current_video_length, start_frame = 0, fit_canvas= fit_canvas == 1)
|
2858 |
target_camera = model_mode
|
2859 |
|
2860 |
audio_proj_split = None
|
|
|
2864 |
from fantasytalking.infer import parse_audio
|
2865 |
import librosa
|
2866 |
duration = librosa.get_duration(path=audio_guide)
|
2867 |
+
current_video_length = min(int(fps * duration // 4) * 4 + 5, current_video_length)
|
2868 |
+
audio_proj_split, audio_context_lens = parse_audio(audio_guide, num_frames= current_video_length, fps= fps, device= processing_device )
|
2869 |
audio_scale = 1.0
|
2870 |
|
2871 |
import random
|
|
|
2889 |
else:
|
2890 |
reuse_frames = 0
|
2891 |
if (diffusion_forcing or ltxv) and source_video != None:
|
2892 |
+
current_video_length += sliding_window_overlap
|
2893 |
+
sliding_window = (vace or diffusion_forcing or ltxv) and current_video_length > sliding_window_size
|
2894 |
|
2895 |
discard_last_frames = sliding_window_discard_last_frames
|
2896 |
+
default_max_frames_to_generate = current_video_length
|
2897 |
if sliding_window:
|
2898 |
+
left_after_first_window = current_video_length - sliding_window_size + discard_last_frames
|
2899 |
initial_total_windows= 1 + math.ceil(left_after_first_window / (sliding_window_size - discard_last_frames - reuse_frames))
|
2900 |
+
current_video_length = sliding_window_size
|
2901 |
else:
|
2902 |
initial_total_windows = 1
|
2903 |
|
2904 |
+
first_window_video_length = current_video_length
|
2905 |
original_prompts = prompts.copy()
|
2906 |
gen["sliding_window"] = sliding_window
|
2907 |
while not abort:
|
|
|
2922 |
window_no = 0
|
2923 |
extra_windows = 0
|
2924 |
guide_start_frame = 0
|
2925 |
+
current_video_length = first_window_video_length
|
2926 |
gen["extra_windows"] = 0
|
2927 |
gen["total_windows"] = 1
|
2928 |
gen["window_no"] = 1
|
|
|
2972 |
num_frames_generated -= reuse_frames
|
2973 |
if (max_frames_to_generate - prefix_video_frames_count - num_frames_generated) < latent_size:
|
2974 |
break
|
2975 |
+
current_video_length = min(sliding_window_size, ((max_frames_to_generate - num_frames_generated - prefix_video_frames_count + reuse_frames + discard_last_frames) // latent_size) * latent_size + 1 )
|
2976 |
|
2977 |
total_windows = initial_total_windows + extra_windows
|
2978 |
gen["total_windows"] = total_windows
|
|
|
3020 |
|
3021 |
if preprocess_type != None :
|
3022 |
send_cmd("progress", progress_args)
|
3023 |
+
video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= current_video_length if window_no == 1 else current_video_length - reuse_frames, start_frame = guide_start_frame, fit_canvas = fit_canvas, target_fps = fps)
|
3024 |
keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
|
3025 |
if len(error) > 0:
|
3026 |
raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
|
3027 |
+
keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame + current_video_length]
|
3028 |
|
3029 |
if window_no == 1:
|
3030 |
image_size = (height, width) # default frame dimensions until it is set by video_src (if there is any)
|
|
|
3033 |
src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
|
3034 |
[video_mask_copy ],
|
3035 |
[image_refs_copy],
|
3036 |
+
current_video_length, image_size = image_size, device ="cpu",
|
3037 |
original_video= "O" in video_prompt_type,
|
3038 |
keep_frames=keep_frames_parsed,
|
3039 |
start_frame = guide_start_frame,
|
3040 |
pre_src_video = [pre_video_guide],
|
3041 |
fit_into_canvas = fit_canvas
|
3042 |
)
|
3043 |
+
if window_no == 1:
|
3044 |
+
conditioning_latents_size = ( (prefix_video_frames_count-1) // latent_size) + 1 if prefix_video_frames_count > 0 else 0
|
3045 |
+
else:
|
3046 |
+
conditioning_latents_size = ( (reuse_frames-1) // latent_size) + 1
|
3047 |
+
|
3048 |
status = get_latest_status(state)
|
3049 |
gen["progress_status"] = status
|
3050 |
gen["progress_phase"] = ("Encoding Prompt", -1 )
|
|
|
3072 |
input_masks = src_mask,
|
3073 |
input_video= pre_video_guide if diffusion_forcing or ltxv else source_video,
|
3074 |
target_camera= target_camera,
|
3075 |
+
frame_num=(current_video_length // latent_size)* latent_size + 1,
|
3076 |
height = height,
|
3077 |
width = width,
|
3078 |
fit_into_canvas = fit_canvas == 1,
|
|
|
3102 |
overlapped_latents = overlapped_latents,
|
3103 |
return_latent_slice= return_latent_slice,
|
3104 |
overlap_noise = sliding_window_overlap_noise,
|
3105 |
+
conditioning_latents_size = conditioning_latents_size,
|
3106 |
model_filename = model_filename,
|
3107 |
)
|
3108 |
except Exception as e:
|
|
|
3171 |
if gen.get("extra_windows",0) > 0:
|
3172 |
sliding_window = True
|
3173 |
if sliding_window :
|
3174 |
+
guide_start_frame += current_video_length
|
3175 |
if discard_last_frames > 0:
|
3176 |
sample = sample[: , :-discard_last_frames]
|
3177 |
guide_start_frame -= discard_last_frames
|
|
|
3270 |
inputs.pop("task")
|
3271 |
configs = prepare_inputs_dict("metadata", inputs)
|
3272 |
configs["prompt"] = "\n".join(original_prompts)
|
3273 |
+
if prompt_enhancer_image_caption_model != None and prompt_enhancer !=None and len(prompt_enhancer)>0:
|
3274 |
configs["enhanced_prompt"] = "\n".join(prompts)
|
3275 |
configs["generation_time"] = round(end_time-start_time)
|
3276 |
metadata_choice = server_config.get("metadata_type","metadata")
|
|
|
3285 |
|
3286 |
print(f"New video saved to Path: "+video_path)
|
3287 |
file_list.append(video_path)
|
3288 |
+
file_settings_list.append(configs)
|
3289 |
+
|
3290 |
send_cmd("output")
|
3291 |
|
3292 |
seed += 1
|
|
|
3533 |
gen = get_gen_info(state)
|
3534 |
clear_file_list = server_config.get("clear_file_list", 0)
|
3535 |
file_list = gen.get("file_list", [])
|
3536 |
+
file_settings_list = gen.get("file_settings_list", [])
|
3537 |
if clear_file_list > 0:
|
3538 |
file_list_current_size = len(file_list)
|
3539 |
keep_file_from = max(file_list_current_size - clear_file_list, 0)
|
|
|
3541 |
choice = gen.get("selected",0)
|
3542 |
choice = max(choice- files_removed, 0)
|
3543 |
file_list = file_list[ keep_file_from: ]
|
3544 |
+
file_settings_list = file_settings_list[ keep_file_from: ]
|
3545 |
else:
|
3546 |
file_list = []
|
3547 |
choice = 0
|
3548 |
gen["selected"] = choice
|
3549 |
gen["file_list"] = file_list
|
3550 |
+
gen["file_settings_list"] = file_settings_list
|
3551 |
|
3552 |
start_time = time.time()
|
3553 |
|
|
|
4021 |
|
4022 |
if target == "state":
|
4023 |
return inputs
|
4024 |
+
unsaved_params = ["image_start", "image_end", "image_refs", "video_guide", "video_source", "video_mask", "audio_guide"]
|
4025 |
for k in unsaved_params:
|
4026 |
inputs.pop(k)
|
4027 |
|
4028 |
model_filename = state["model_filename"]
|
4029 |
+
inputs["type"] = f"WanGP v{WanGP_version} by DeepBeepMeep - " + get_model_name(model_filename)
|
4030 |
|
4031 |
if target == "settings":
|
4032 |
return inputs
|
|
|
4037 |
if not server_config.get("enhancer_enabled", 0) == 1:
|
4038 |
inputs.pop("prompt_enhancer")
|
4039 |
|
4040 |
+
if not "recam" in model_filename and not "diffusion_forcing" in model_filename:
|
4041 |
inputs.pop("model_mode")
|
4042 |
|
4043 |
+
if not "Vace" in model_filename and not "phantom" in model_filename and not "hunyuan_video_custom" in model_filename:
|
4044 |
unsaved_params = ["keep_frames_video_guide", "video_prompt_type", "remove_background_images_ref"]
|
4045 |
for k in unsaved_params:
|
4046 |
inputs.pop(k)
|
|
|
4051 |
inputs.pop(k)
|
4052 |
|
4053 |
|
4054 |
+
if not "Vace" in model_filename and not "diffusion_forcing" in model_filename and not "ltxv" in model_filename:
|
4055 |
unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]
|
4056 |
for k in unsaved_params:
|
4057 |
inputs.pop(k)
|
|
|
4059 |
if not "fantasy" in model_filename:
|
4060 |
inputs.pop("audio_guidance_scale")
|
4061 |
|
4062 |
+
if not "hunyuan" in model_filename:
|
4063 |
+
inputs.pop("embedded_guidance_scale")
|
4064 |
|
4065 |
if target == "metadata":
|
4066 |
inputs = {k: v for k,v in inputs.items() if v != None }
|
|
|
4073 |
for k in args_names:
|
4074 |
kwargs[k] = locals[k]
|
4075 |
return kwargs
|
4076 |
+
|
4077 |
+
def export_settings(state):
|
4078 |
+
model_filename = state["model_filename"]
|
4079 |
+
model_type = get_model_type(model_filename)
|
4080 |
+
settings = state[model_type]
|
4081 |
+
settings["state"] = state
|
4082 |
+
settings = prepare_inputs_dict("metadata", settings)
|
4083 |
+
settings["model_filename"] = model_filename
|
4084 |
+
text = json.dumps(settings, indent=4)
|
4085 |
+
text_base64 = base64.b64encode(text.encode('utf8')).decode('utf-8')
|
4086 |
+
return text_base64
|
4087 |
+
|
4088 |
+
def use_video_settings(state, files):
|
4089 |
+
gen = get_gen_info(state)
|
4090 |
+
choice = gen.get("selected",-1)
|
4091 |
+
file_list = gen.get("file_list", None)
|
4092 |
+
if file_list !=None and choice >=0 and len(file_list)>0:
|
4093 |
+
file_settings_list = gen["file_settings_list"]
|
4094 |
+
configs = file_settings_list[choice]
|
4095 |
+
model_filename = configs["model_filename"]
|
4096 |
+
model_type = get_model_type(model_filename)
|
4097 |
+
defaults = state.get(model_type, None)
|
4098 |
+
defaults = get_default_settings(model_filename) if defaults == None else defaults
|
4099 |
+
defaults.update(configs)
|
4100 |
+
current_model_filename = state["model_filename"]
|
4101 |
+
prompt = configs.get("prompt", "")
|
4102 |
+
state[model_type] = defaults
|
4103 |
+
gr.Info(f"Settings Loaded from Video with prompt '{prompt[:100]}'")
|
4104 |
+
if model_type == get_model_type(current_model_filename):
|
4105 |
+
return gr.update(), str(time.time())
|
4106 |
+
else:
|
4107 |
+
return generate_dropdown_model_list(model_filename), gr.update()
|
4108 |
+
else:
|
4109 |
+
gr.Info(f"No Video is Selected")
|
4110 |
+
|
4111 |
+
return gr.update(), gr.update()
|
4112 |
+
|
4113 |
+
def load_settings_from_file(state, file_path):
|
4114 |
+
gen = get_gen_info(state)
|
4115 |
+
if file_path==None:
|
4116 |
+
return gr.update(), gr.update(), None
|
4117 |
+
|
4118 |
+
configs = None
|
4119 |
+
if file_path.endswith(".json"):
|
4120 |
+
try:
|
4121 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
4122 |
+
configs = json.load(f)
|
4123 |
+
except:
|
4124 |
+
pass
|
4125 |
+
else:
|
4126 |
+
from mutagen.mp4 import MP4
|
4127 |
+
tags = None
|
4128 |
+
try:
|
4129 |
+
file = MP4(file_path)
|
4130 |
+
tags = file.tags['©cmt'][0]
|
4131 |
+
except:
|
4132 |
+
pass
|
4133 |
+
if tags != None:
|
4134 |
+
configs = json.loads(tags)
|
4135 |
+
if configs == None:
|
4136 |
+
gr.Info("File not supported")
|
4137 |
+
return gr.update(), gr.update(), None
|
4138 |
+
|
4139 |
+
prompt = configs.get("prompt", "")
|
4140 |
+
current_model_filename = state["model_filename"]
|
4141 |
+
model_filename = configs["model_filename"]
|
4142 |
+
model_type = get_model_type(model_filename)
|
4143 |
+
defaults = state.get(model_type, None)
|
4144 |
+
defaults = get_default_settings(model_filename) if defaults == None else defaults
|
4145 |
+
defaults.update(configs)
|
4146 |
+
state[model_type]= defaults
|
4147 |
+
if tags != None:
|
4148 |
+
gr.Info(f"Settings Loaded from Video generated with prompt '{prompt[:100]}'")
|
4149 |
+
else:
|
4150 |
+
gr.Info(f"Settings Loaded from Settings file with prompt '{prompt[:100]}'")
|
4151 |
+
if model_type == get_model_type(current_model_filename):
|
4152 |
+
return gr.update(), str(time.time()), None
|
4153 |
+
else:
|
4154 |
+
return generate_dropdown_model_list(model_filename), gr.update(), None
|
4155 |
|
4156 |
def save_inputs(
|
4157 |
target,
|
|
|
4516 |
if ltxv:
|
4517 |
model_mode = gr.Dropdown(
|
4518 |
choices=[
|
4519 |
+
], value=None,
|
4520 |
visible= False
|
4521 |
)
|
4522 |
else:
|
|
|
4566 |
columns=[3], rows=[1], object_fit="contain", height="auto", selected_index=0, interactive= True, visible="E" in image_prompt_type_value, value= ui_defaults.get("image_end", None))
|
4567 |
|
4568 |
video_source = gr.Video(visible=False)
|
4569 |
+
model_mode = gr.Dropdown(value=None, visible=False)
|
4570 |
keep_frames_video_source = gr.Text(visible=False)
|
4571 |
|
4572 |
with gr.Column(visible= vace or phantom or hunyuan_video_custom) as video_prompt_column:
|
|
|
4880 |
|
4881 |
with gr.Row():
|
4882 |
save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
|
4883 |
+
export_settings_from_file_btn = gr.Button("Export Settings to File", visible = not args.lock_config)
|
4884 |
+
use_video_settings_btn = gr.Button("Use Selected Video Settings", visible = not args.lock_config)
|
4885 |
+
with gr.Row():
|
4886 |
+
settings_file = gr.File(height=41,label="Load Settings From Video / Json")
|
4887 |
+
settings_base64_output = gr.Text(interactive= False, visible=False, value = "")
|
4888 |
if not update_form:
|
4889 |
with gr.Column():
|
4890 |
gen_status = gr.Text(interactive= False, label = "Status")
|
4891 |
status_trigger = gr.Text(interactive= False, visible=False)
|
4892 |
output = gr.Gallery( label="Generated videos", show_label=False, elem_id="gallery" , columns=[3], rows=[1], object_fit="contain", height=450, selected_index=0, interactive= False)
|
4893 |
output_trigger = gr.Text(interactive= False, visible=False)
|
4894 |
+
refresh_form_trigger = gr.Text(interactive= False, visible=False)
|
4895 |
+
|
4896 |
generate_btn = gr.Button("Generate")
|
4897 |
add_to_queue_btn = gr.Button("Add New Prompt To Queue", visible = False)
|
4898 |
|
|
|
4992 |
gen["status_display"] = True
|
4993 |
return time.time()
|
4994 |
|
4995 |
+
start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js = get_js()
|
4996 |
+
|
4997 |
status_trigger.change(refresh_status_async, inputs= [state] , outputs= [gen_status], show_progress_on= [gen_status])
|
4998 |
|
4999 |
output_trigger.change(refresh_gallery,
|
|
|
5010 |
locals_dict = locals()
|
5011 |
gen_inputs = [locals_dict[k] for k in inputs_names] + [state]
|
5012 |
save_settings_btn.click( fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] , outputs= [prompt]).then(
|
5013 |
+
save_inputs, inputs =[target_settings] + gen_inputs, outputs = [])
|
5014 |
|
5015 |
+
use_video_settings_btn.click(fn=validate_wizard_prompt,
|
5016 |
+
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
|
5017 |
+
outputs= [prompt]
|
5018 |
+
).then(fn=save_inputs,
|
5019 |
+
inputs =[target_state] + gen_inputs,
|
5020 |
+
outputs= None
|
5021 |
+
).then( fn=use_video_settings, inputs =[state, output] , outputs= [model_choice, refresh_form_trigger])
|
5022 |
+
|
5023 |
+
export_settings_from_file_btn.click(fn=validate_wizard_prompt,
|
5024 |
+
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
|
5025 |
+
outputs= [prompt]
|
5026 |
+
).then(fn=save_inputs,
|
5027 |
+
inputs =[target_state] + gen_inputs,
|
5028 |
+
outputs= None
|
5029 |
+
).then(fn=export_settings,
|
5030 |
+
inputs =[state],
|
5031 |
+
outputs= [settings_base64_output]
|
5032 |
+
).then(
|
5033 |
+
fn=None,
|
5034 |
+
inputs=[settings_base64_output],
|
5035 |
+
outputs=None,
|
5036 |
+
js=trigger_settings_download_js
|
5037 |
+
)
|
5038 |
+
|
5039 |
+
|
5040 |
+
settings_file.upload(fn=validate_wizard_prompt,
|
5041 |
+
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
|
5042 |
+
outputs= [prompt]
|
5043 |
+
).then(fn=save_inputs,
|
5044 |
+
inputs =[target_state] + gen_inputs,
|
5045 |
+
outputs= None
|
5046 |
+
).then(fn=load_settings_from_file, inputs =[state, settings_file] , outputs= [model_choice, refresh_form_trigger, settings_file])
|
5047 |
+
|
5048 |
+
|
5049 |
+
refresh_form_trigger.change(fn= fill_inputs,
|
5050 |
+
inputs=[state],
|
5051 |
+
outputs=gen_inputs + extra_inputs
|
5052 |
+
).then(fn=validate_wizard_prompt,
|
5053 |
+
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars],
|
5054 |
+
outputs= [prompt]
|
5055 |
+
)
|
5056 |
|
5057 |
model_choice.change(fn=validate_wizard_prompt,
|
5058 |
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
|
|
|
5060 |
).then(fn=save_inputs,
|
5061 |
inputs =[target_state] + gen_inputs,
|
5062 |
outputs= None
|
5063 |
+
).then(fn= change_model,
|
5064 |
inputs=[state, model_choice],
|
5065 |
outputs= [header]
|
5066 |
+
).then(fn= fill_inputs,
|
5067 |
inputs=[state],
|
5068 |
outputs=gen_inputs + extra_inputs
|
5069 |
).then(fn= preload_model_when_switching,
|
|
|
5132 |
)
|
5133 |
|
5134 |
|
|
|
5135 |
|
5136 |
single_hidden_trigger_btn.click(
|
5137 |
fn=show_countdown_info_from_state,
|
|
|
5491 |
gr.Markdown("Please note that if your turn on compilation, the first denoising step of the first video generation will be slow due to the compilation. Therefore all your tests should be done with compilation turned off.")
|
5492 |
|
5493 |
|
5494 |
+
def generate_dropdown_model_list(model_filename):
|
5495 |
dropdown_types= transformer_types if len(transformer_types) > 0 else model_types
|
5496 |
+
current_model_type = get_model_type(model_filename)
|
5497 |
if current_model_type not in dropdown_types:
|
5498 |
dropdown_types.append(current_model_type)
|
5499 |
model_list = []
|
|
|
5528 |
tab_state["tab_no"] = new_tab_no
|
5529 |
return gr.Tabs()
|
5530 |
|
5531 |
+
def get_js():
|
5532 |
start_quit_timer_js = """
|
5533 |
() => {
|
5534 |
function findAndClickGradioButton(elemId) {
|
|
|
5597 |
}
|
5598 |
}
|
5599 |
"""
|
|
|
5600 |
|
5601 |
+
trigger_settings_download_js = """
|
5602 |
+
(base64String) => {
|
5603 |
+
if (!base64String) {
|
5604 |
+
console.log("No base64 settings data received, skipping download.");
|
5605 |
+
return;
|
5606 |
+
}
|
5607 |
+
try {
|
5608 |
+
const byteCharacters = atob(base64String);
|
5609 |
+
const byteNumbers = new Array(byteCharacters.length);
|
5610 |
+
for (let i = 0; i < byteCharacters.length; i++) {
|
5611 |
+
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
5612 |
+
}
|
5613 |
+
const byteArray = new Uint8Array(byteNumbers);
|
5614 |
+
const blob = new Blob([byteArray], { type: 'application/text' });
|
5615 |
+
|
5616 |
+
const url = URL.createObjectURL(blob);
|
5617 |
+
const a = document.createElement('a');
|
5618 |
+
a.style.display = 'none';
|
5619 |
+
a.href = url;
|
5620 |
+
a.download = 'settings.json';
|
5621 |
+
document.body.appendChild(a);
|
5622 |
+
a.click();
|
5623 |
+
|
5624 |
+
window.URL.revokeObjectURL(url);
|
5625 |
+
document.body.removeChild(a);
|
5626 |
+
console.log("settings download triggered.");
|
5627 |
+
} catch (e) {
|
5628 |
+
console.error("Error processing base64 data or triggering download:", e);
|
5629 |
+
}
|
5630 |
+
}
|
5631 |
+
"""
|
5632 |
+
return start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js
|
5633 |
+
|
5634 |
+
def create_ui():
|
5635 |
global vmc_event_handler
|
5636 |
css = """
|
5637 |
#model_list{
|
|
|
5874 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
5875 |
|
5876 |
with gr.Blocks(css=css, theme=theme, title= "WanGP") as main:
|
5877 |
+
gr.Markdown(f"<div align=center><H1>Wan<SUP>GP</SUP> v{WanGP_version} <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
|
5878 |
global model_list
|
5879 |
|
5880 |
tab_state = gr.State({ "tab_no":0 })
|
|
|
5887 |
model_choice = gr.Dropdown(visible=False, value= get_model_type(transformer_filename))
|
5888 |
else:
|
5889 |
gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
|
5890 |
+
model_choice = generate_dropdown_model_list(transformer_filename)
|
5891 |
gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
|
5892 |
with gr.Row():
|
5893 |
header = gr.Markdown(generate_header(transformer_filename, compile, attention_mode), visible= True)
|
|
|
5928 |
server_name = "0.0.0.0"
|
5929 |
if len(server_name) == 0:
|
5930 |
server_name = os.getenv("SERVER_NAME", "localhost")
|
5931 |
+
demo = create_ui()
|
5932 |
if args.open_browser:
|
5933 |
import webbrowser
|
5934 |
if server_name.startswith("http"):
|