Spaces:

jbilcke-hf
/

Wan2GP_you_must_clone_this_space_to_use_it

Running on T4

App Files Files Community

DeepBeepMeep commited on May 26

Commit

896a1b4

1 Parent(s): f0621e1

added multiple ways to extract settings from past video generations

Browse files

Files changed (4) hide show

README.md +4 -0
ltx_video/pipelines/pipeline_ltx_video.py +1 -1
wan/text2video.py +19 -6
wgp.py +219 -44

README.md CHANGED Viewed

@@ -21,6 +21,10 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 ## 🔥 Latest News!!
 * May 23 2025: 👋 Wan 2.1GP v5.21 : Improvements for Vace: better transitions between Sliding Windows,Support for Image masks in Matanyone, new Extend Video for Vace, different types of automated background removal
 * May 20 2025: 👋 Wan 2.1GP v5.2 : Added support for Wan CausVid which is a distilled Wan model that can generate nice looking videos in only 4 to 12 steps.
  The great thing is that Kijai (Kudos to him !) has created a CausVid Lora that can be combined with any existing Wan t2v model 14B like Wan Vace 14B.

 ## 🔥 Latest News!!
+* May 26 2025: 👋 Wan 2.1GP v5.3 : Happy with a Video generation and want to do more generations using the same settings but you can't remember what you did or you find it to hard to copy / paste one per one each setting from the file metadata ? Rejoice ! There are now multiple ways to turn this tedious process into a one click task:
+    - Select one Video recently generated in the Video Gallery and click *Use Selected Video Settings*
+    - Click *Drop File Here*  and select a Video you saved somewhere, if the settings metadata have been saved with the Video you will be able to extract them automatically
+    - Click *Export Settings to File* to save on your harddrive the current settings. You will be able to use them later again by clicking *Drop File Here* and select this time a Settings json file
 * May 23 2025: 👋 Wan 2.1GP v5.21 : Improvements for Vace: better transitions between Sliding Windows,Support for Image masks in Matanyone, new Extend Video for Vace, different types of automated background removal
 * May 20 2025: 👋 Wan 2.1GP v5.2 : Added support for Wan CausVid which is a distilled Wan model that can generate nice looking videos in only 4 to 12 steps.
  The great thing is that Kijai (Kudos to him !) has created a CausVid Lora that can be combined with any existing Wan t2v model 14B like Wan Vace 14B.

ltx_video/pipelines/pipeline_ltx_video.py CHANGED Viewed

@@ -1113,7 +1113,7 @@ class LTXVideoPipeline(DiffusionPipeline):
                     )
                 latent_model_input = (
-                    torch.cat([latents] * num_conds) if num_conds > 1 else latents
                 )
                 latent_model_input = self.scheduler.scale_model_input(
                     latent_model_input, t

                     )
                 latent_model_input = (
+                        torch.cat([latents] * num_conds) if num_conds > 1 else latents
                 )
                 latent_model_input = self.scheduler.scale_model_input(
                     latent_model_input, t

wan/text2video.py CHANGED Viewed

@@ -124,7 +124,7 @@ class WanT2V:
             reactive = [i * m + 0 * (1 - m) for i, m in zip(frames, masks)]
             inactive = self.vae.encode(inactive, tile_size = tile_size)
             self.toto = inactive[0].clone()
-            if overlapped_latents  != None :
                 # inactive[0][:, 0:1] = self.vae.encode([frames[0][:, 0:1]], tile_size = tile_size)[0] # redundant
                 inactive[0][:, 1:overlapped_latents.shape[1] + 1] = overlapped_latents
@@ -303,6 +303,7 @@ class WanT2V:
                 overlapped_latents  = None,
                 return_latent_slice = None,
                 overlap_noise = 0,
                 model_filename = None,
                 **bbargs
                 ):
@@ -445,8 +446,9 @@ class WanT2V:
         if vace:
             ref_images_count = len(input_ref_images[0]) if input_ref_images != None and input_ref_images[0] != None else 0
             kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
-            if overlapped_latents != None:
                 overlapped_latents_size = overlapped_latents.shape[1] + 1
                 z_reactive = [  zz[0:16, 0:overlapped_latents_size + ref_images_count].clone() for zz in z]
@@ -456,22 +458,33 @@ class WanT2V:
             self.model.compute_teacache_threshold(self.model.teacache_start_step, timesteps, self.model.teacache_multiplier)
         if callback != None:
             callback(-1, None, True)
         for i, t in enumerate(tqdm(timesteps)):
-            if overlapped_latents != None:
                 # overlap_noise_factor = overlap_noise *(i/(len(timesteps)-1)) / 1000
                 overlap_noise_factor = overlap_noise / 1000
                 latent_noise_factor = t / 1000
                 for zz, zz_r, ll in zip(z, z_reactive, [latents]):
                     pass
-                    zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count]   = zz_r[:, ref_images_count:]  * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor
-                    ll[:, 0:overlapped_latents_size + ref_images_count]   = zz_r  * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor
             if target_camera != None:
                 latent_model_input = torch.cat([latents, source_latents], dim=1)
             else:
                 latent_model_input = latents
             kwargs["slg_layers"] = slg_layers if int(slg_start * sampling_steps) <= i < int(slg_end * sampling_steps) else None
-            timestep = [t]
             offload.set_step_no_for_lora(self.model, i)
             timestep = torch.stack(timestep)
             kwargs["current_step"] = i

             reactive = [i * m + 0 * (1 - m) for i, m in zip(frames, masks)]
             inactive = self.vae.encode(inactive, tile_size = tile_size)
             self.toto = inactive[0].clone()
+            if overlapped_latents  != None  :
                 # inactive[0][:, 0:1] = self.vae.encode([frames[0][:, 0:1]], tile_size = tile_size)[0] # redundant
                 inactive[0][:, 1:overlapped_latents.shape[1] + 1] = overlapped_latents
                 overlapped_latents  = None,
                 return_latent_slice = None,
                 overlap_noise = 0,
+                conditioning_latents_size = 0,
                 model_filename = None,
                 **bbargs
                 ):
         if vace:
             ref_images_count = len(input_ref_images[0]) if input_ref_images != None and input_ref_images[0] != None else 0
             kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
+            if overlapped_latents != None :
                 overlapped_latents_size = overlapped_latents.shape[1] + 1
+                # overlapped_latents_size = 3
                 z_reactive = [  zz[0:16, 0:overlapped_latents_size + ref_images_count].clone() for zz in z]
             self.model.compute_teacache_threshold(self.model.teacache_start_step, timesteps, self.model.teacache_multiplier)
         if callback != None:
             callback(-1, None, True)
+        prev = 50/1000
         for i, t in enumerate(tqdm(timesteps)):
+            timestep = [t]
+            if overlapped_latents != None :
                 # overlap_noise_factor = overlap_noise *(i/(len(timesteps)-1)) / 1000
                 overlap_noise_factor = overlap_noise / 1000
+                # overlap_noise_factor = (1000-t )/ 1000  #  overlap_noise / 1000
+                # latent_noise_factor = 1 #max(min(1,  (t - overlap_noise)  / 1000 ),0)
                 latent_noise_factor = t / 1000
                 for zz, zz_r, ll in zip(z, z_reactive, [latents]):
                     pass
+                    # zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count]   = zz_r[:, ref_images_count:]  * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor
+                    # ll[:, 0:overlapped_latents_size + ref_images_count]   = zz_r  * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor
+            if conditioning_latents_size > 0 and overlap_noise > 0:
+                pass
+                overlap_noise_factor = overlap_noise / 1000
+                latents[:, conditioning_latents_size + ref_images_count:]   = latents[:, conditioning_latents_size + ref_images_count:]  * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor
+                timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]
             if target_camera != None:
                 latent_model_input = torch.cat([latents, source_latents], dim=1)
             else:
                 latent_model_input = latents
             kwargs["slg_layers"] = slg_layers if int(slg_start * sampling_steps) <= i < int(slg_end * sampling_steps) else None
             offload.set_step_no_for_lora(self.model, i)
             timestep = torch.stack(timestep)
             kwargs["current_step"] = i

wgp.py CHANGED Viewed

@@ -43,6 +43,7 @@ AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10
 target_mmgp_version = "3.4.7"
 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
 from importlib.metadata import version
@@ -434,6 +435,7 @@ def process_prompt_and_add_tasks(state, model_choice):
                 "prompt" : single_prompt,
                 "image_start": start,
                 "image_end" : end,
             }
             inputs.update(extra_inputs)
             add_video_task(**inputs)
@@ -1535,7 +1537,7 @@ wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/w
                         "ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
 ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_lora128_bf16.safetensors"]
-hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
                  "ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
 transformer_choices = wan_choices_t2v + wan_choices_i2v + ltxv_choices + hunyuan_choices
@@ -2323,13 +2325,13 @@ def apply_changes(  state,
     transformer_dtype_policy = server_config["transformer_dtype_policy"]
     text_encoder_quantization = server_config["text_encoder_quantization"]
     transformer_types = server_config["transformer_types"]
-    state["model_filename"] = get_model_filename(get_model_type(state["model_filename"]), transformer_quantization, transformer_dtype_policy)
     if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
         model_choice = gr.Dropdown()
     else:
         reload_needed = True
-        model_choice = generate_dropdown_model_list()
     header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
     return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1)
@@ -2699,6 +2701,8 @@ def generate_video(
     torch.set_grad_enabled(False)
     file_list = gen["file_list"]
     prompt_no = gen["prompt_no"]
     fit_canvas = server_config.get("fit_canvas", 0)
@@ -2796,7 +2800,8 @@ def generate_video(
     seed = None if seed == -1 else seed
     # negative_prompt = "" # not applicable in the inference
     image2video = test_class_i2v(model_filename)
-    enable_RIFLEx = RIFLEx_setting == 0 and video_length > (6* 16) or RIFLEx_setting == 1
     # VAE Tiling
     device_mem_capacity = torch.cuda.get_device_properties(None).total_memory / 1048576
@@ -2849,7 +2854,7 @@ def generate_video(
     source_video = None
     target_camera = None
     if "recam" in model_filename:
-        source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= video_length, start_frame = 0, fit_canvas= fit_canvas == 1)
         target_camera = model_mode
     audio_proj_split = None
@@ -2859,8 +2864,8 @@ def generate_video(
         from fantasytalking.infer import parse_audio
         import librosa
         duration = librosa.get_duration(path=audio_guide)
-        video_length = min(int(fps * duration // 4) * 4 + 5, video_length)
-        audio_proj_split, audio_context_lens = parse_audio(audio_guide, num_frames= video_length, fps= fps, device= processing_device  )
         audio_scale = 1.0
     import random
@@ -2884,19 +2889,19 @@ def generate_video(
     else:
         reuse_frames = 0
     if (diffusion_forcing or ltxv) and source_video != None:
-        video_length +=  sliding_window_overlap
-    sliding_window = (vace or diffusion_forcing or ltxv) and video_length > sliding_window_size
     discard_last_frames = sliding_window_discard_last_frames
-    default_max_frames_to_generate = video_length
     if sliding_window:
-        left_after_first_window = video_length - sliding_window_size + discard_last_frames
         initial_total_windows= 1 + math.ceil(left_after_first_window / (sliding_window_size - discard_last_frames - reuse_frames))
-        video_length = sliding_window_size
     else:
         initial_total_windows = 1
-    first_window_video_length = video_length
     original_prompts = prompts.copy()
     gen["sliding_window"] = sliding_window
     while not abort:
@@ -2917,7 +2922,7 @@ def generate_video(
         window_no = 0
         extra_windows = 0
         guide_start_frame = 0
-        video_length = first_window_video_length
         gen["extra_windows"] = 0
         gen["total_windows"] = 1
         gen["window_no"] = 1
@@ -2967,7 +2972,7 @@ def generate_video(
                 num_frames_generated -= reuse_frames
                 if (max_frames_to_generate - prefix_video_frames_count - num_frames_generated) <  latent_size:
                     break
-                video_length = min(sliding_window_size, ((max_frames_to_generate - num_frames_generated - prefix_video_frames_count + reuse_frames + discard_last_frames) // latent_size) * latent_size + 1 )
             total_windows = initial_total_windows + extra_windows
             gen["total_windows"] = total_windows
@@ -3015,11 +3020,11 @@ def generate_video(
                     if preprocess_type != None :
                         send_cmd("progress", progress_args)
-                        video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= video_length if window_no == 1 else video_length - reuse_frames, start_frame = guide_start_frame, fit_canvas = fit_canvas, target_fps = fps)
                 keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
                 if len(error) > 0:
                     raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
-                keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame + video_length]
                 if window_no == 1:
                     image_size = (height, width) #  default frame dimensions until it is set by video_src (if there is any)
@@ -3028,13 +3033,18 @@ def generate_video(
                 src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
                                                                         [video_mask_copy ],
                                                                         [image_refs_copy],
-                                                                        video_length, image_size = image_size, device ="cpu",
                                                                         original_video= "O" in video_prompt_type,
                                                                         keep_frames=keep_frames_parsed,
                                                                         start_frame = guide_start_frame,
                                                                         pre_src_video = [pre_video_guide],
                                                                         fit_into_canvas = fit_canvas
                                                                         )
             status = get_latest_status(state)
             gen["progress_status"] = status
             gen["progress_phase"] = ("Encoding Prompt", -1 )
@@ -3062,7 +3072,7 @@ def generate_video(
                     input_masks = src_mask,
                     input_video= pre_video_guide  if diffusion_forcing or ltxv else source_video,
                     target_camera= target_camera,
-                    frame_num=(video_length // latent_size)* latent_size + 1,
                     height =  height,
                     width = width,
                     fit_into_canvas = fit_canvas == 1,
@@ -3092,6 +3102,7 @@ def generate_video(
                     overlapped_latents = overlapped_latents,
                     return_latent_slice= return_latent_slice,
                     overlap_noise = sliding_window_overlap_noise,
                     model_filename = model_filename,
                 )
             except Exception as e:
@@ -3160,7 +3171,7 @@ def generate_video(
                 if gen.get("extra_windows",0) > 0:
                     sliding_window = True
                 if sliding_window :
-                    guide_start_frame += video_length
                     if discard_last_frames > 0:
                         sample = sample[: , :-discard_last_frames]
                         guide_start_frame -= discard_last_frames
@@ -3259,7 +3270,7 @@ def generate_video(
                 inputs.pop("task")
                 configs = prepare_inputs_dict("metadata", inputs)
                 configs["prompt"] = "\n".join(original_prompts)
-                if prompt_enhancer_image_caption_model != None:
                     configs["enhanced_prompt"] = "\n".join(prompts)
                 configs["generation_time"] = round(end_time-start_time)
                 metadata_choice = server_config.get("metadata_type","metadata")
@@ -3274,6 +3285,8 @@ def generate_video(
                 print(f"New video saved to Path: "+video_path)
                 file_list.append(video_path)
                 send_cmd("output")
         seed += 1
@@ -3520,6 +3533,7 @@ def process_tasks(state):
     gen = get_gen_info(state)
     clear_file_list = server_config.get("clear_file_list", 0)
     file_list = gen.get("file_list", [])
     if clear_file_list > 0:
         file_list_current_size = len(file_list)
         keep_file_from = max(file_list_current_size - clear_file_list, 0)
@@ -3527,11 +3541,13 @@ def process_tasks(state):
         choice = gen.get("selected",0)
         choice = max(choice- files_removed, 0)
         file_list = file_list[ keep_file_from: ]
     else:
         file_list = []
         choice = 0
     gen["selected"] = choice
     gen["file_list"] = file_list
     start_time = time.time()
@@ -4005,12 +4021,12 @@ def prepare_inputs_dict(target, inputs ):
     if target == "state":
         return inputs
-    unsaved_params = ["image_start", "image_end", "image_refs", "video_guide", "video_source", "video_mask", "audio_guide", "embedded_guidance_scale"]
     for k in unsaved_params:
         inputs.pop(k)
     model_filename = state["model_filename"]
-    inputs["type"] = "WanGP by DeepBeepMeep - " +  get_model_name(model_filename)
     if target == "settings":
         return inputs
@@ -4021,10 +4037,10 @@ def prepare_inputs_dict(target, inputs ):
     if not server_config.get("enhancer_enabled", 0) == 1:
         inputs.pop("prompt_enhancer")
-    if not "recam" in model_filename or not "diffusion_forcing" in model_filename:
         inputs.pop("model_mode")
-    if not "Vace" in model_filename or not "phantom" in model_filename or not "hunyuan_video_custom" in model_filename:
         unsaved_params = ["keep_frames_video_guide", "video_prompt_type",  "remove_background_images_ref"]
         for k in unsaved_params:
             inputs.pop(k)
@@ -4035,7 +4051,7 @@ def prepare_inputs_dict(target, inputs ):
             inputs.pop(k)
-    if not "Vace" in model_filename or "diffusion_forcing" in model_filename or "ltxv" in model_filename:
         unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]
         for k in unsaved_params:
             inputs.pop(k)
@@ -4043,6 +4059,8 @@ def prepare_inputs_dict(target, inputs ):
     if not "fantasy" in model_filename:
         inputs.pop("audio_guidance_scale")
     if target == "metadata":
         inputs = {k: v for k,v in inputs.items() if v != None  }
@@ -4055,7 +4073,85 @@ def get_function_arguments(func, locals):
     for k in args_names:
         kwargs[k] = locals[k]
     return kwargs
 def save_inputs(
             target,
@@ -4420,7 +4516,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                     if ltxv:
                         model_mode = gr.Dropdown(
                             choices=[
-                            ],
                             visible= False
                         )
                     else:
@@ -4470,7 +4566,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                             columns=[3], rows=[1], object_fit="contain", height="auto", selected_index=0, interactive= True, visible="E" in image_prompt_type_value, value= ui_defaults.get("image_end", None))
                     video_source = gr.Video(visible=False)
-                    model_mode = gr.Dropdown(visible=False)
                     keep_frames_video_source = gr.Text(visible=False)
             with gr.Column(visible= vace or phantom or hunyuan_video_custom) as video_prompt_column:
@@ -4784,14 +4880,19 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
             with gr.Row():
                 save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
         if not update_form:
             with gr.Column():
                 gen_status = gr.Text(interactive= False, label = "Status")
                 status_trigger = gr.Text(interactive= False, visible=False)
                 output = gr.Gallery( label="Generated videos", show_label=False, elem_id="gallery" , columns=[3], rows=[1], object_fit="contain", height=450, selected_index=0, interactive= False)
                 output_trigger = gr.Text(interactive= False, visible=False)
                 generate_btn = gr.Button("Generate")
                 add_to_queue_btn = gr.Button("Add New Prompt To Queue", visible = False)
@@ -4891,6 +4992,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                 gen["status_display"] = True
                 return time.time()
             status_trigger.change(refresh_status_async, inputs= [state] , outputs= [gen_status], show_progress_on= [gen_status])
             output_trigger.change(refresh_gallery,
@@ -4907,8 +5010,49 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
             locals_dict = locals()
             gen_inputs = [locals_dict[k] for k in inputs_names] + [state]
             save_settings_btn.click( fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] , outputs= [prompt]).then(
-            save_inputs, inputs =[target_settings] + gen_inputs, outputs = [])
             model_choice.change(fn=validate_wizard_prompt,
                 inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] ,
@@ -4916,10 +5060,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
             ).then(fn=save_inputs,
                 inputs =[target_state] + gen_inputs,
                 outputs= None
-                ).then(fn= change_model,
                 inputs=[state, model_choice],
                 outputs= [header]
-                ).then(fn= fill_inputs,
                 inputs=[state],
                 outputs=gen_inputs + extra_inputs
             ).then(fn= preload_model_when_switching,
@@ -4988,7 +5132,6 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
             )
-            start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js = get_timer_js()
             single_hidden_trigger_btn.click(
                 fn=show_countdown_info_from_state,
@@ -5348,9 +5491,9 @@ def generate_info_tab():
     gr.Markdown("Please note that if your turn on compilation, the first denoising step of the first video generation will be slow due to the compilation. Therefore all your tests should be done with compilation turned off.")
-def generate_dropdown_model_list():
     dropdown_types= transformer_types if len(transformer_types) > 0 else model_types
-    current_model_type = get_model_type(transformer_filename)
     if current_model_type not in dropdown_types:
         dropdown_types.append(current_model_type)
     model_list = []
@@ -5385,7 +5528,7 @@ def select_tab(tab_state, evt:gr.SelectData):
     tab_state["tab_no"] = new_tab_no
     return gr.Tabs()
-def get_timer_js():
     start_quit_timer_js = """
     () => {
         function findAndClickGradioButton(elemId) {
@@ -5454,9 +5597,41 @@ def get_timer_js():
         }
     }
     """
-    return start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js
-def create_demo():
     global vmc_event_handler
     css = """
         #model_list{
@@ -5699,7 +5874,7 @@ def create_demo():
         theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
     with gr.Blocks(css=css, theme=theme, title= "WanGP") as main:
-        gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v5.21 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
         global model_list
         tab_state = gr.State({ "tab_no":0 })
@@ -5712,7 +5887,7 @@ def create_demo():
                         model_choice = gr.Dropdown(visible=False, value= get_model_type(transformer_filename))
                     else:
                         gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
-                        model_choice = generate_dropdown_model_list()
                         gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
                 with gr.Row():
                     header = gr.Markdown(generate_header(transformer_filename, compile, attention_mode), visible= True)
@@ -5753,7 +5928,7 @@ if __name__ == "__main__":
         server_name = "0.0.0.0"
     if len(server_name) == 0:
         server_name = os.getenv("SERVER_NAME", "localhost")
-    demo = create_demo()
     if args.open_browser:
         import webbrowser
         if server_name.startswith("http"):

 PROMPT_VARS_MAX = 10
 target_mmgp_version = "3.4.7"
+WanGP_version = "5.3"
 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
 from importlib.metadata import version
                 "prompt" : single_prompt,
                 "image_start": start,
                 "image_end" : end,
+                "video_source": None,
             }
             inputs.update(extra_inputs)
             add_video_task(**inputs)
                         "ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
 ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_lora128_bf16.safetensors"]
+hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16v2.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
                  "ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
 transformer_choices = wan_choices_t2v + wan_choices_i2v + ltxv_choices + hunyuan_choices
     transformer_dtype_policy = server_config["transformer_dtype_policy"]
     text_encoder_quantization = server_config["text_encoder_quantization"]
     transformer_types = server_config["transformer_types"]
+    model_filename = get_model_filename(get_model_type(state["model_filename"]), transformer_quantization, transformer_dtype_policy)
+    state["model_filename"] = model_filename
     if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
         model_choice = gr.Dropdown()
     else:
         reload_needed = True
+        model_choice = generate_dropdown_model_list(model_filename)
     header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
     return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1)
     torch.set_grad_enabled(False)
     file_list = gen["file_list"]
+    file_settings_list = gen["file_settings_list"]
     prompt_no = gen["prompt_no"]
     fit_canvas = server_config.get("fit_canvas", 0)
     seed = None if seed == -1 else seed
     # negative_prompt = "" # not applicable in the inference
     image2video = test_class_i2v(model_filename)
+    current_video_length = video_length
+    enable_RIFLEx = RIFLEx_setting == 0 and current_video_length > (6* 16) or RIFLEx_setting == 1
     # VAE Tiling
     device_mem_capacity = torch.cuda.get_device_properties(None).total_memory / 1048576
     source_video = None
     target_camera = None
     if "recam" in model_filename:
+        source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= current_video_length, start_frame = 0, fit_canvas= fit_canvas == 1)
         target_camera = model_mode
     audio_proj_split = None
         from fantasytalking.infer import parse_audio
         import librosa
         duration = librosa.get_duration(path=audio_guide)
+        current_video_length = min(int(fps * duration // 4) * 4 + 5, current_video_length)
+        audio_proj_split, audio_context_lens = parse_audio(audio_guide, num_frames= current_video_length, fps= fps, device= processing_device  )
         audio_scale = 1.0
     import random
     else:
         reuse_frames = 0
     if (diffusion_forcing or ltxv) and source_video != None:
+        current_video_length +=  sliding_window_overlap
+    sliding_window = (vace or diffusion_forcing or ltxv) and current_video_length > sliding_window_size
     discard_last_frames = sliding_window_discard_last_frames
+    default_max_frames_to_generate = current_video_length
     if sliding_window:
+        left_after_first_window = current_video_length - sliding_window_size + discard_last_frames
         initial_total_windows= 1 + math.ceil(left_after_first_window / (sliding_window_size - discard_last_frames - reuse_frames))
+        current_video_length = sliding_window_size
     else:
         initial_total_windows = 1
+    first_window_video_length = current_video_length
     original_prompts = prompts.copy()
     gen["sliding_window"] = sliding_window
     while not abort:
         window_no = 0
         extra_windows = 0
         guide_start_frame = 0
+        current_video_length = first_window_video_length
         gen["extra_windows"] = 0
         gen["total_windows"] = 1
         gen["window_no"] = 1
                 num_frames_generated -= reuse_frames
                 if (max_frames_to_generate - prefix_video_frames_count - num_frames_generated) <  latent_size:
                     break
+                current_video_length = min(sliding_window_size, ((max_frames_to_generate - num_frames_generated - prefix_video_frames_count + reuse_frames + discard_last_frames) // latent_size) * latent_size + 1 )
             total_windows = initial_total_windows + extra_windows
             gen["total_windows"] = total_windows
                     if preprocess_type != None :
                         send_cmd("progress", progress_args)
+                        video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= current_video_length if window_no == 1 else current_video_length - reuse_frames, start_frame = guide_start_frame, fit_canvas = fit_canvas, target_fps = fps)
                 keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
                 if len(error) > 0:
                     raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
+                keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame + current_video_length]
                 if window_no == 1:
                     image_size = (height, width) #  default frame dimensions until it is set by video_src (if there is any)
                 src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
                                                                         [video_mask_copy ],
                                                                         [image_refs_copy],
+                                                                        current_video_length, image_size = image_size, device ="cpu",
                                                                         original_video= "O" in video_prompt_type,
                                                                         keep_frames=keep_frames_parsed,
                                                                         start_frame = guide_start_frame,
                                                                         pre_src_video = [pre_video_guide],
                                                                         fit_into_canvas = fit_canvas
                                                                         )
+            if window_no ==  1:
+                conditioning_latents_size = ( (prefix_video_frames_count-1) // latent_size) + 1 if prefix_video_frames_count > 0 else 0
+            else:
+                conditioning_latents_size = ( (reuse_frames-1) // latent_size) + 1
             status = get_latest_status(state)
             gen["progress_status"] = status
             gen["progress_phase"] = ("Encoding Prompt", -1 )
                     input_masks = src_mask,
                     input_video= pre_video_guide  if diffusion_forcing or ltxv else source_video,
                     target_camera= target_camera,
+                    frame_num=(current_video_length // latent_size)* latent_size + 1,
                     height =  height,
                     width = width,
                     fit_into_canvas = fit_canvas == 1,
                     overlapped_latents = overlapped_latents,
                     return_latent_slice= return_latent_slice,
                     overlap_noise = sliding_window_overlap_noise,
+                    conditioning_latents_size = conditioning_latents_size,
                     model_filename = model_filename,
                 )
             except Exception as e:
                 if gen.get("extra_windows",0) > 0:
                     sliding_window = True
                 if sliding_window :
+                    guide_start_frame += current_video_length
                     if discard_last_frames > 0:
                         sample = sample[: , :-discard_last_frames]
                         guide_start_frame -= discard_last_frames
                 inputs.pop("task")
                 configs = prepare_inputs_dict("metadata", inputs)
                 configs["prompt"] = "\n".join(original_prompts)
+                if prompt_enhancer_image_caption_model != None and prompt_enhancer !=None and len(prompt_enhancer)>0:
                     configs["enhanced_prompt"] = "\n".join(prompts)
                 configs["generation_time"] = round(end_time-start_time)
                 metadata_choice = server_config.get("metadata_type","metadata")
                 print(f"New video saved to Path: "+video_path)
                 file_list.append(video_path)
+                file_settings_list.append(configs)
                 send_cmd("output")
         seed += 1
     gen = get_gen_info(state)
     clear_file_list = server_config.get("clear_file_list", 0)
     file_list = gen.get("file_list", [])
+    file_settings_list = gen.get("file_settings_list", [])
     if clear_file_list > 0:
         file_list_current_size = len(file_list)
         keep_file_from = max(file_list_current_size - clear_file_list, 0)
         choice = gen.get("selected",0)
         choice = max(choice- files_removed, 0)
         file_list = file_list[ keep_file_from: ]
+        file_settings_list = file_settings_list[ keep_file_from: ]
     else:
         file_list = []
         choice = 0
     gen["selected"] = choice
     gen["file_list"] = file_list
+    gen["file_settings_list"] = file_settings_list
     start_time = time.time()
     if target == "state":
         return inputs
+    unsaved_params = ["image_start", "image_end", "image_refs", "video_guide", "video_source", "video_mask", "audio_guide"]
     for k in unsaved_params:
         inputs.pop(k)
     model_filename = state["model_filename"]
+    inputs["type"] = f"WanGP v{WanGP_version} by DeepBeepMeep - " +  get_model_name(model_filename)
     if target == "settings":
         return inputs
     if not server_config.get("enhancer_enabled", 0) == 1:
         inputs.pop("prompt_enhancer")
+    if not "recam" in model_filename and not "diffusion_forcing" in model_filename:
         inputs.pop("model_mode")
+    if not "Vace" in model_filename and not "phantom" in model_filename and not "hunyuan_video_custom" in model_filename:
         unsaved_params = ["keep_frames_video_guide", "video_prompt_type",  "remove_background_images_ref"]
         for k in unsaved_params:
             inputs.pop(k)
             inputs.pop(k)
+    if not "Vace" in model_filename and not "diffusion_forcing" in model_filename and not "ltxv" in model_filename:
         unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]
         for k in unsaved_params:
             inputs.pop(k)
     if not "fantasy" in model_filename:
         inputs.pop("audio_guidance_scale")
+    if not "hunyuan" in model_filename:
+        inputs.pop("embedded_guidance_scale")
     if target == "metadata":
         inputs = {k: v for k,v in inputs.items() if v != None  }
     for k in args_names:
         kwargs[k] = locals[k]
     return kwargs
+def export_settings(state):
+    model_filename = state["model_filename"]
+    model_type = get_model_type(model_filename)
+    settings = state[model_type]
+    settings["state"] = state
+    settings = prepare_inputs_dict("metadata", settings)
+    settings["model_filename"] = model_filename
+    text = json.dumps(settings, indent=4)
+    text_base64 = base64.b64encode(text.encode('utf8')).decode('utf-8')
+    return text_base64
+def use_video_settings(state, files):
+    gen = get_gen_info(state)
+    choice = gen.get("selected",-1)
+    file_list = gen.get("file_list", None)
+    if file_list !=None and choice >=0 and len(file_list)>0:
+        file_settings_list = gen["file_settings_list"]
+        configs = file_settings_list[choice]
+        model_filename = configs["model_filename"]
+        model_type = get_model_type(model_filename)
+        defaults = state.get(model_type, None)
+        defaults = get_default_settings(model_filename) if defaults == None else defaults
+        defaults.update(configs)
+        current_model_filename = state["model_filename"]
+        prompt = configs.get("prompt", "")
+        state[model_type] = defaults
+        gr.Info(f"Settings Loaded from Video with prompt '{prompt[:100]}'")
+        if model_type == get_model_type(current_model_filename):
+            return gr.update(), str(time.time())
+        else:
+            return generate_dropdown_model_list(model_filename), gr.update()
+    else:
+        gr.Info(f"No Video is Selected")
+    return gr.update(), gr.update()
+def load_settings_from_file(state, file_path):
+    gen = get_gen_info(state)
+    if file_path==None:
+        return gr.update(), gr.update(), None
+    configs = None
+    if file_path.endswith(".json"):
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                configs = json.load(f)
+        except:
+            pass
+    else:
+        from mutagen.mp4 import MP4
+        tags = None
+        try:
+            file = MP4(file_path)
+            tags = file.tags['©cmt'][0]
+        except:
+            pass
+        if tags != None:
+            configs = json.loads(tags)
+    if configs == None:
+        gr.Info("File not supported")
+        return gr.update(), gr.update(), None
+    prompt = configs.get("prompt", "")
+    current_model_filename = state["model_filename"]
+    model_filename = configs["model_filename"]
+    model_type = get_model_type(model_filename)
+    defaults = state.get(model_type, None)
+    defaults = get_default_settings(model_filename) if defaults == None else defaults
+    defaults.update(configs)
+    state[model_type]= defaults
+    if tags != None:
+        gr.Info(f"Settings Loaded from Video generated with prompt '{prompt[:100]}'")
+    else:
+        gr.Info(f"Settings Loaded from Settings file with prompt '{prompt[:100]}'")
+    if model_type == get_model_type(current_model_filename):
+        return gr.update(), str(time.time()), None
+    else:
+        return generate_dropdown_model_list(model_filename), gr.update(), None
 def save_inputs(
             target,
                     if ltxv:
                         model_mode = gr.Dropdown(
                             choices=[
+                            ], value=None,
                             visible= False
                         )
                     else:
                             columns=[3], rows=[1], object_fit="contain", height="auto", selected_index=0, interactive= True, visible="E" in image_prompt_type_value, value= ui_defaults.get("image_end", None))
                     video_source = gr.Video(visible=False)
+                    model_mode = gr.Dropdown(value=None, visible=False)
                     keep_frames_video_source = gr.Text(visible=False)
             with gr.Column(visible= vace or phantom or hunyuan_video_custom) as video_prompt_column:
             with gr.Row():
                 save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
+                export_settings_from_file_btn = gr.Button("Export Settings to File", visible = not args.lock_config)
+                use_video_settings_btn = gr.Button("Use Selected Video Settings", visible = not args.lock_config)
+            with gr.Row():
+                settings_file = gr.File(height=41,label="Load Settings From Video / Json")
+                settings_base64_output = gr.Text(interactive= False, visible=False, value = "")
         if not update_form:
             with gr.Column():
                 gen_status = gr.Text(interactive= False, label = "Status")
                 status_trigger = gr.Text(interactive= False, visible=False)
                 output = gr.Gallery( label="Generated videos", show_label=False, elem_id="gallery" , columns=[3], rows=[1], object_fit="contain", height=450, selected_index=0, interactive= False)
                 output_trigger = gr.Text(interactive= False, visible=False)
+                refresh_form_trigger = gr.Text(interactive= False, visible=False)
                 generate_btn = gr.Button("Generate")
                 add_to_queue_btn = gr.Button("Add New Prompt To Queue", visible = False)
                 gen["status_display"] = True
                 return time.time()
+            start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js = get_js()
             status_trigger.change(refresh_status_async, inputs= [state] , outputs= [gen_status], show_progress_on= [gen_status])
             output_trigger.change(refresh_gallery,
             locals_dict = locals()
             gen_inputs = [locals_dict[k] for k in inputs_names] + [state]
             save_settings_btn.click( fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] , outputs= [prompt]).then(
+                save_inputs, inputs =[target_settings] + gen_inputs, outputs = [])
+            use_video_settings_btn.click(fn=validate_wizard_prompt,
+                inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] ,
+                outputs= [prompt]
+            ).then(fn=save_inputs,
+                inputs =[target_state] + gen_inputs,
+                outputs= None
+            ).then( fn=use_video_settings, inputs =[state, output] , outputs= [model_choice, refresh_form_trigger])
+            export_settings_from_file_btn.click(fn=validate_wizard_prompt,
+                inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] ,
+                outputs= [prompt]
+            ).then(fn=save_inputs,
+                inputs =[target_state] + gen_inputs,
+                outputs= None
+            ).then(fn=export_settings,
+                inputs =[state],
+                outputs= [settings_base64_output]
+            ).then(
+                fn=None,
+                inputs=[settings_base64_output],
+                outputs=None,
+                js=trigger_settings_download_js
+            )
+            settings_file.upload(fn=validate_wizard_prompt,
+                inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] ,
+                outputs= [prompt]
+            ).then(fn=save_inputs,
+                inputs =[target_state] + gen_inputs,
+                outputs= None
+            ).then(fn=load_settings_from_file, inputs =[state, settings_file] , outputs= [model_choice, refresh_form_trigger, settings_file])
+            refresh_form_trigger.change(fn= fill_inputs,
+                inputs=[state],
+                outputs=gen_inputs + extra_inputs
+            ).then(fn=validate_wizard_prompt,
+                inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars],
+                outputs= [prompt]
+            )
             model_choice.change(fn=validate_wizard_prompt,
                 inputs= [state, wizard_prompt_activated_var, wizard_variables_var,  prompt, wizard_prompt, *prompt_vars] ,
             ).then(fn=save_inputs,
                 inputs =[target_state] + gen_inputs,
                 outputs= None
+            ).then(fn= change_model,
                 inputs=[state, model_choice],
                 outputs= [header]
+            ).then(fn= fill_inputs,
                 inputs=[state],
                 outputs=gen_inputs + extra_inputs
             ).then(fn= preload_model_when_switching,
             )
             single_hidden_trigger_btn.click(
                 fn=show_countdown_info_from_state,
     gr.Markdown("Please note that if your turn on compilation, the first denoising step of the first video generation will be slow due to the compilation. Therefore all your tests should be done with compilation turned off.")
+def generate_dropdown_model_list(model_filename):
     dropdown_types= transformer_types if len(transformer_types) > 0 else model_types
+    current_model_type = get_model_type(model_filename)
     if current_model_type not in dropdown_types:
         dropdown_types.append(current_model_type)
     model_list = []
     tab_state["tab_no"] = new_tab_no
     return gr.Tabs()
+def get_js():
     start_quit_timer_js = """
     () => {
         function findAndClickGradioButton(elemId) {
         }
     }
     """
+    trigger_settings_download_js = """
+    (base64String) => {
+        if (!base64String) {
+        console.log("No base64 settings data received, skipping download.");
+        return;
+        }
+        try {
+        const byteCharacters = atob(base64String);
+        const byteNumbers = new Array(byteCharacters.length);
+        for (let i = 0; i < byteCharacters.length; i++) {
+            byteNumbers[i] = byteCharacters.charCodeAt(i);
+        }
+        const byteArray = new Uint8Array(byteNumbers);
+        const blob = new Blob([byteArray], { type: 'application/text' });
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement('a');
+        a.style.display = 'none';
+        a.href = url;
+        a.download = 'settings.json';
+        document.body.appendChild(a);
+        a.click();
+        window.URL.revokeObjectURL(url);
+        document.body.removeChild(a);
+        console.log("settings download triggered.");
+        } catch (e) {
+        console.error("Error processing base64 data or triggering download:", e);
+        }
+    }
+    """
+    return start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js
+def create_ui():
     global vmc_event_handler
     css = """
         #model_list{
         theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
     with gr.Blocks(css=css, theme=theme, title= "WanGP") as main:
+        gr.Markdown(f"<div align=center><H1>Wan<SUP>GP</SUP> v{WanGP_version} <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
         global model_list
         tab_state = gr.State({ "tab_no":0 })
                         model_choice = gr.Dropdown(visible=False, value= get_model_type(transformer_filename))
                     else:
                         gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
+                        model_choice = generate_dropdown_model_list(transformer_filename)
                         gr.Markdown("<div class='title-with-lines'><div class=line width=100%></div></div>")
                 with gr.Row():
                     header = gr.Markdown(generate_header(transformer_filename, compile, attention_mode), visible= True)
         server_name = "0.0.0.0"
     if len(server_name) == 0:
         server_name = os.getenv("SERVER_NAME", "localhost")
+    demo = create_ui()
     if args.open_browser:
         import webbrowser
         if server_name.startswith("http"):