rahul7star commited on
Commit
0079199
·
verified ·
1 Parent(s): f949e2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -59
app.py CHANGED
@@ -1,4 +1,9 @@
1
  import os
 
 
 
 
 
2
  os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
3
 
4
  import spaces
@@ -13,7 +18,13 @@ import numpy as np
13
  from PIL import Image
14
  import random
15
  import gc
16
- from optimization import optimize_pipeline_
 
 
 
 
 
 
17
 
18
  # Model configurations
19
  T2V_MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
@@ -24,62 +35,89 @@ MAX_SEED = np.iinfo(np.int32).max
24
  FIXED_FPS = 16
25
  MIN_FRAMES_MODEL = 8
26
  MAX_FRAMES_MODEL = 81
27
- MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
28
- MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 
 
 
 
29
 
30
- # Initialize T2V pipeline
31
- vae = AutoencoderKLWan.from_pretrained(T2V_MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
32
- t2v_pipe = WanPipeline.from_pretrained(T2V_MODEL_ID,
33
- transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
34
- subfolder='transformer',
35
- torch_dtype=torch.bfloat16,
36
- device_map='cuda',
37
- ),
38
- transformer_2=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
39
- subfolder='transformer_2',
40
- torch_dtype=torch.bfloat16,
41
- device_map='cuda',
42
- ),
43
- vae=vae,
44
- torch_dtype=torch.bfloat16,
45
- ).to('cuda')
46
 
47
- # Initialize I2V pipeline
48
- i2v_pipe = WanImageToVideoPipeline.from_pretrained(I2V_MODEL_ID,
49
- transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
50
- subfolder='transformer',
51
- torch_dtype=torch.bfloat16,
52
- device_map='cuda',
53
- ),
54
- transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
55
- subfolder='transformer_2',
56
- torch_dtype=torch.bfloat16,
57
- device_map='cuda',
58
- ),
59
- torch_dtype=torch.bfloat16,
60
- ).to('cuda')
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # Memory management
63
- for i in range(3):
64
- gc.collect()
65
- torch.cuda.synchronize()
66
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- # Optimize pipelines
69
- optimize_pipeline_(t2v_pipe,
70
- prompt='prompt',
71
- height=LANDSCAPE_HEIGHT,
72
- width=LANDSCAPE_WIDTH,
73
- num_frames=MAX_FRAMES_MODEL,
74
- )
75
 
76
- optimize_pipeline_(i2v_pipe,
77
- image=Image.new('RGB', (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
78
- prompt='prompt',
79
- height=LANDSCAPE_HEIGHT,
80
- width=LANDSCAPE_WIDTH,
81
- num_frames=MAX_FRAMES_MODEL,
82
- )
83
 
84
  # Default prompts
85
  default_prompt_t2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
@@ -107,11 +145,23 @@ def resize_image_landscape(image: Image.Image) -> Image.Image:
107
  image = image.crop((0, top, width, top + new_height))
108
  return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
109
 
110
- def get_duration(*args, **kwargs):
111
- steps = kwargs.get('steps', args[2] if len(args) > 2 else 4)
 
 
 
 
 
 
 
 
 
 
 
112
  return int(steps) * 15
113
 
114
  @spaces.GPU(duration=get_duration)
 
115
  def generate_video(
116
  mode,
117
  input_image,
@@ -125,14 +175,13 @@ def generate_video(
125
  randomize_seed=False,
126
  progress=gr.Progress(track_tqdm=True),
127
  ):
128
- if mode == "Image-to-Video" and input_image is None:
129
- raise gr.Error("Please upload an input image for Image-to-Video mode.")
130
-
131
  num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
132
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
133
 
134
  if mode == "Text-to-Video":
135
- output_frames_list = t2v_pipe(
 
 
136
  prompt=prompt,
137
  negative_prompt=negative_prompt,
138
  height=LANDSCAPE_HEIGHT,
@@ -144,8 +193,12 @@ def generate_video(
144
  generator=torch.Generator(device="cuda").manual_seed(current_seed),
145
  ).frames[0]
146
  else: # Image-to-Video
 
 
 
 
147
  resized_image = resize_image(input_image)
148
- output_frames_list = i2v_pipe(
149
  image=resized_image,
150
  prompt=prompt,
151
  negative_prompt=negative_prompt,
@@ -162,6 +215,7 @@ def generate_video(
162
  video_path = tmpfile.name
163
 
164
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
 
165
  return video_path, current_seed
166
 
167
  with gr.Blocks() as demo:
@@ -201,6 +255,7 @@ with gr.Blocks() as demo:
201
 
202
  gr.Examples(
203
  examples=[
 
204
  ["Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."],
205
  ["A cinematic shot of a boat sailing on a calm sea at sunset."],
206
  ["Drone footage flying over a futuristic city with flying cars."],
 
1
  import os
2
+ # Set environment variables before any imports to suppress inductor warnings
3
+ os.environ["TORCHINDUCTOR_CUDA_GRAPHS"] = "0"
4
+ os.environ["TORCHINDUCTOR_MAX_AUTOTUNE_GEMM"] = "0"
5
+
6
+ # Install dependencies as specified
7
  os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
8
 
9
  import spaces
 
18
  from PIL import Image
19
  import random
20
  import gc
21
+
22
+ # Assuming optimize_pipeline_ is a custom function; if not available, define a no-op
23
+ try:
24
+ from optimization import optimize_pipeline_
25
+ except ImportError:
26
+ def optimize_pipeline_(pipe, **kwargs):
27
+ pass # No-op if optimization is not available
28
 
29
  # Model configurations
30
  T2V_MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
 
35
  FIXED_FPS = 16
36
  MIN_FRAMES_MODEL = 8
37
  MAX_FRAMES_MODEL = 81
38
+ MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
39
+ MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
40
+
41
+ # Cache for pipelines
42
+ t2v_pipe_cache = [None]
43
+ i2v_pipe_cache = [None]
44
 
45
+ def clear_memory():
46
+ """Aggressively clear memory and CUDA cache."""
47
+ for _ in range(3):
48
+ gc.collect()
49
+ if torch.cuda.is_available():
50
+ torch.cuda.empty_cache()
51
+ torch.cuda.synchronize()
 
 
 
 
 
 
 
 
 
52
 
53
+ def load_t2v_pipeline():
54
+ """Load and optimize the T2V pipeline."""
55
+ if t2v_pipe_cache[0] is None:
56
+ vae = AutoencoderKLWan.from_pretrained(T2V_MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
57
+ t2v_pipe_cache[0] = WanPipeline.from_pretrained(T2V_MODEL_ID,
58
+ transformer=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
59
+ subfolder='transformer',
60
+ torch_dtype=torch.bfloat16,
61
+ device_map='cuda',
62
+ ),
63
+ transformer_2=WanTransformer3DModel.from_pretrained('linoyts/Wan2.2-T2V-A14B-Diffusers-BF16',
64
+ subfolder='transformer_2',
65
+ torch_dtype=torch.bfloat16,
66
+ device_map='cuda',
67
+ ),
68
+ vae=vae,
69
+ torch_dtype=torch.bfloat16,
70
+ ).to('cuda')
71
+ optimize_pipeline_(t2v_pipe_cache[0],
72
+ prompt='prompt',
73
+ height=LANDSCAPE_HEIGHT,
74
+ width=LANDSCAPE_WIDTH,
75
+ num_frames=MAX_FRAMES_MODEL,
76
+ )
77
+ t2v_pipe_cache[0].enable_model_cpu_offload() # Enable CPU offload for memory optimization
78
+ clear_memory()
79
+ return t2v_pipe_cache[0]
80
 
81
+ def load_i2v_pipeline():
82
+ """Load and optimize the I2V pipeline."""
83
+ if i2v_pipe_cache[0] is None:
84
+ i2v_pipe_cache[0] = WanImageToVideoPipeline.from_pretrained(I2V_MODEL_ID,
85
+ transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
86
+ subfolder='transformer',
87
+ torch_dtype=torch.bfloat16,
88
+ device_map='cuda',
89
+ ),
90
+ transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
91
+ subfolder='transformer_2',
92
+ torch_dtype=torch.bfloat16,
93
+ device_map='cuda',
94
+ ),
95
+ torch_dtype=torch.bfloat16,
96
+ ).to('cuda')
97
+ optimize_pipeline_(i2v_pipe_cache[0],
98
+ image=Image.new('RGB', (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
99
+ prompt='prompt',
100
+ height=LANDSCAPE_HEIGHT,
101
+ width=LANDSCAPE_WIDTH,
102
+ num_frames=MAX_FRAMES_MODEL,
103
+ )
104
+ i2v_pipe_cache[0].enable_model_cpu_offload() # Enable CPU offload for memory optimization
105
+ clear_memory()
106
+ return i2v_pipe_cache[0]
107
 
108
+ def unload_t2v_pipeline():
109
+ if t2v_pipe_cache[0] is not None:
110
+ t2v_pipe_cache[0].to("cpu")
111
+ del t2v_pipe_cache[0]
112
+ t2v_pipe_cache[0] = None
113
+ clear_memory()
 
114
 
115
+ def unload_i2v_pipeline():
116
+ if i2v_pipe_cache[0] is not None:
117
+ i2v_pipe_cache[0].to("cpu")
118
+ del i2v_pipe_cache[0]
119
+ i2v_pipe_cache[0] = None
120
+ clear_memory()
 
121
 
122
  # Default prompts
123
  default_prompt_t2v = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
 
145
  image = image.crop((0, top, width, top + new_height))
146
  return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
147
 
148
+ def get_duration(
149
+ mode,
150
+ input_image,
151
+ prompt,
152
+ negative_prompt,
153
+ duration_seconds,
154
+ guidance_scale,
155
+ guidance_scale_2,
156
+ steps,
157
+ seed,
158
+ randomize_seed,
159
+ progress,
160
+ ):
161
  return int(steps) * 15
162
 
163
  @spaces.GPU(duration=get_duration)
164
+ @torch.no_grad()
165
  def generate_video(
166
  mode,
167
  input_image,
 
175
  randomize_seed=False,
176
  progress=gr.Progress(track_tqdm=True),
177
  ):
 
 
 
178
  num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
179
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
180
 
181
  if mode == "Text-to-Video":
182
+ unload_i2v_pipeline() # Unload I2V to free memory
183
+ pipe = load_t2v_pipeline()
184
+ output_frames_list = pipe(
185
  prompt=prompt,
186
  negative_prompt=negative_prompt,
187
  height=LANDSCAPE_HEIGHT,
 
193
  generator=torch.Generator(device="cuda").manual_seed(current_seed),
194
  ).frames[0]
195
  else: # Image-to-Video
196
+ unload_t2v_pipeline() # Unload T2V to free memory
197
+ pipe = load_i2v_pipeline()
198
+ if input_image is None:
199
+ raise gr.Error("Please upload an input image.")
200
  resized_image = resize_image(input_image)
201
+ output_frames_list = pipe(
202
  image=resized_image,
203
  prompt=prompt,
204
  negative_prompt=negative_prompt,
 
215
  video_path = tmpfile.name
216
 
217
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
218
+ clear_memory() # Clean up after generation
219
  return video_path, current_seed
220
 
221
  with gr.Blocks() as demo:
 
255
 
256
  gr.Examples(
257
  examples=[
258
+ ["POV selfie video, white cat with sunglasses standing on surfboard, relaxed smile, tropical beach behind (clear water, green hills, blue sky with clouds). Surfboard tips, cat falls into ocean, camera plunges underwater with bubbles and sunlight beams. Brief underwater view of cat’s face, then cat resurfaces, still filming selfie, playful summer vacation mood."],
259
  ["Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."],
260
  ["A cinematic shot of a boat sailing on a calm sea at sunset."],
261
  ["Drone footage flying over a futuristic city with flying cars."],