Fabrice-TIERCELIN commited on
Commit
2df5040
·
verified ·
1 Parent(s): 32f84ce
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -468,8 +468,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
468
  return [start_latent, image_encoder_last_hidden_state]
469
 
470
  [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
471
- input_image = None
472
- end_image = None
473
 
474
  # Dtype
475
 
@@ -565,7 +565,7 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
565
  [llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n] = prompt_parameters[prompt_index]
566
 
567
  if prompt_index < len(prompt_parameters) - 1 or (prompt_index == total_latent_sections - 1):
568
- prompt_parameters[prompt_index] = None
569
 
570
  if not high_vram:
571
  unload_complete_models()
@@ -613,6 +613,13 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
613
  clean_latent_4x_indices=clean_latent_4x_indices,
614
  callback=callback,
615
  )
 
 
 
 
 
 
 
616
 
617
  [total_generated_latent_frames, history_latents, history_pixels] = post_process(forward, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, section_index, total_latent_sections, outputs_folder, mp4_crf, stream)
618
 
@@ -626,7 +633,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
626
  real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
627
  zero_latents = history_latents[:, :, total_generated_latent_frames:, :, :]
628
  history_latents = torch.cat([zero_latents, real_history_latents], dim=2)
629
- real_history_latents = zero_latents = None
 
630
 
631
  forward = True
632
  section_index = first_section_index
@@ -754,8 +762,8 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
754
  return [start_latent, end_latent, image_encoder_last_hidden_state]
755
 
756
  [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
757
- input_image = None
758
- end_image = None
759
 
760
  # Dtype
761
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
@@ -905,6 +913,13 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
905
  clean_latent_4x_indices=clean_latent_4x_indices,
906
  callback=callback,
907
  )
 
 
 
 
 
 
 
908
 
909
  [total_generated_latent_frames, history_latents, history_pixels] = post_process(job_id, start_latent, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, outputs_folder, mp4_crf, stream, is_last_section)
910
 
@@ -949,7 +964,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
949
 
950
  # 20250506 pftq: Encode video
951
  start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
952
- input_video = None
953
  start_latent = start_latent.to(dtype=torch.float32, device=cpu)
954
  video_latents = video_latents.cpu()
955
 
@@ -987,7 +1002,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
987
  load_model_as_complete(image_encoder, target_device=gpu)
988
 
989
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
990
- input_image_np = None
991
 
992
  # 20250507 pftq: Process end frame if provided
993
  if end_frame is not None:
@@ -999,7 +1014,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
999
  end_frame, target_width=width, target_height=height, vae=vae,
1000
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
1001
  )[0]
1002
- end_frame = None
1003
  end_latent = end_latent.to(dtype=torch.float32, device=cpu)
1004
  else:
1005
  end_latent = None
@@ -1009,7 +1024,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
1009
  unload_complete_models(image_encoder, vae)
1010
 
1011
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
1012
- image_encoder_output = None
1013
 
1014
  # Dtype
1015
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
@@ -1119,8 +1134,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
1119
  history_latents = video_latents
1120
  total_generated_latent_frames = history_latents.shape[2]
1121
  # 20250506 pftq: Initialize history_pixels to fix UnboundLocalError
1122
- history_pixels = None
1123
- previous_video = None
1124
 
1125
  # 20250509 Generate backwards with end frame for better end frame anchoring
1126
  if total_latent_sections > 4:
@@ -1181,13 +1195,13 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
1181
  clean_latent_4x_indices=clean_latent_4x_indices,
1182
  callback=callback,
1183
  )
1184
- clean_latents = None
1185
- clean_latents_2x = None
1186
- clean_latents_4x = None
1187
- latent_indices = None
1188
- clean_latent_indices = None
1189
- clean_latent_2x_indices = None
1190
- clean_latent_4x_indices = None
1191
 
1192
  total_generated_latent_frames += int(generated_latents.shape[2])
1193
  history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
 
468
  return [start_latent, image_encoder_last_hidden_state]
469
 
470
  [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
471
+ del input_image
472
+ del end_image
473
 
474
  # Dtype
475
 
 
565
  [llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n] = prompt_parameters[prompt_index]
566
 
567
  if prompt_index < len(prompt_parameters) - 1 or (prompt_index == total_latent_sections - 1):
568
+ del prompt_parameters[prompt_index]
569
 
570
  if not high_vram:
571
  unload_complete_models()
 
613
  clean_latent_4x_indices=clean_latent_4x_indices,
614
  callback=callback,
615
  )
616
+ del clean_latents
617
+ del clean_latents_2x
618
+ del clean_latents_4x
619
+ del latent_indices
620
+ del clean_latent_indices
621
+ del clean_latent_2x_indices
622
+ del clean_latent_4x_indices
623
 
624
  [total_generated_latent_frames, history_latents, history_pixels] = post_process(forward, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, section_index, total_latent_sections, outputs_folder, mp4_crf, stream)
625
 
 
633
  real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
634
  zero_latents = history_latents[:, :, total_generated_latent_frames:, :, :]
635
  history_latents = torch.cat([zero_latents, real_history_latents], dim=2)
636
+ del real_history_latents
637
+ del zero_latents
638
 
639
  forward = True
640
  section_index = first_section_index
 
762
  return [start_latent, end_latent, image_encoder_last_hidden_state]
763
 
764
  [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
765
+ del input_image
766
+ del end_image
767
 
768
  # Dtype
769
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
 
913
  clean_latent_4x_indices=clean_latent_4x_indices,
914
  callback=callback,
915
  )
916
+ del clean_latents
917
+ del clean_latents_2x
918
+ del clean_latents_4x
919
+ del latent_indices
920
+ del clean_latent_indices
921
+ del clean_latent_2x_indices
922
+ del clean_latent_4x_indices
923
 
924
  [total_generated_latent_frames, history_latents, history_pixels] = post_process(job_id, start_latent, generated_latents, total_generated_latent_frames, history_latents, high_vram, transformer, gpu, vae, history_pixels, latent_window_size, enable_preview, outputs_folder, mp4_crf, stream, is_last_section)
925
 
 
964
 
965
  # 20250506 pftq: Encode video
966
  start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
967
+ del input_video
968
  start_latent = start_latent.to(dtype=torch.float32, device=cpu)
969
  video_latents = video_latents.cpu()
970
 
 
1002
  load_model_as_complete(image_encoder, target_device=gpu)
1003
 
1004
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
1005
+ del input_image_np
1006
 
1007
  # 20250507 pftq: Process end frame if provided
1008
  if end_frame is not None:
 
1014
  end_frame, target_width=width, target_height=height, vae=vae,
1015
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
1016
  )[0]
1017
+ del end_frame
1018
  end_latent = end_latent.to(dtype=torch.float32, device=cpu)
1019
  else:
1020
  end_latent = None
 
1024
  unload_complete_models(image_encoder, vae)
1025
 
1026
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
1027
+ del image_encoder_output
1028
 
1029
  # Dtype
1030
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
 
1134
  history_latents = video_latents
1135
  total_generated_latent_frames = history_latents.shape[2]
1136
  # 20250506 pftq: Initialize history_pixels to fix UnboundLocalError
1137
+ history_pixels = previous_video = None
 
1138
 
1139
  # 20250509 Generate backwards with end frame for better end frame anchoring
1140
  if total_latent_sections > 4:
 
1195
  clean_latent_4x_indices=clean_latent_4x_indices,
1196
  callback=callback,
1197
  )
1198
+ del clean_latents
1199
+ del clean_latents_2x
1200
+ del clean_latents_4x
1201
+ del latent_indices
1202
+ del clean_latent_indices
1203
+ del clean_latent_2x_indices
1204
+ del clean_latent_4x_indices
1205
 
1206
  total_generated_latent_frames += int(generated_latents.shape[2])
1207
  history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)