linoyts HF Staff commited on
Commit
79640f8
·
verified ·
1 Parent(s): 0447ff3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -208
app.py CHANGED
@@ -11,13 +11,17 @@ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
11
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
 
14
-
15
  import math
16
  from huggingface_hub import hf_hub_download
17
  from safetensors.torch import load_file
18
 
19
  import os
 
 
 
 
20
 
 
21
  import tempfile
22
  from PIL import Image
23
  import os
@@ -38,247 +42,182 @@ pipe.load_lora_weights(
38
  "dx8152/Qwen-Edit-2509-Multiple-angles",
39
  weight_name="镜头转换.safetensors", adapter_name="angles"
40
  )
41
- pipe.set_adapters(["angles"], adapter_weights=[1.])
 
 
 
 
 
42
  pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.)
 
43
  pipe.unload_lora_weights()
44
 
45
 
46
- # Apply the same optimizations from the first version
47
  pipe.transformer.__class__ = QwenImageTransformer2DModel
48
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
49
 
50
- # --- Ahead-of-time compilation ---
51
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
52
 
53
  # --- UI Constants and Helpers ---
54
  MAX_SEED = np.iinfo(np.int32).max
55
 
56
- def use_output_as_input(output_images):
57
- """Convert output images to input format for the gallery"""
58
- if output_images is None or len(output_images) == 0:
59
- return []
60
- return output_images
61
 
62
- def suggest_next_scene_prompt(images):
63
- pil_images = []
64
- if images is not None:
65
- for item in images:
66
- try:
67
- if isinstance(item[0], Image.Image):
68
- pil_images.append(item[0].convert("RGB"))
69
- elif isinstance(item[0], str):
70
- pil_images.append(Image.open(item[0]).convert("RGB"))
71
- elif hasattr(item, "name"):
72
- pil_images.append(Image.open(item.name).convert("RGB"))
73
- except Exception:
74
- continue
75
- if len(pil_images) > 0:
76
- prompt = next_scene_prompt("", pil_images)
77
- else:
78
- prompt = ""
79
- print("next scene prompt: ", prompt)
80
- return prompt
81
-
82
- # --- Main Inference Function (with hardcoded negative prompt) ---
 
 
 
 
 
 
 
 
83
  @spaces.GPU(duration=300)
84
- def infer(
85
- images,
86
- prompt,
87
- seed=42,
88
- randomize_seed=False,
89
- true_guidance_scale=1.0,
90
- num_inference_steps=4,
91
- height=None,
92
- width=None,
93
- num_images_per_prompt=1,
94
- progress=gr.Progress(track_tqdm=True),
 
 
 
 
95
  ):
96
- """
97
- Generates an image using the local Qwen-Image diffusers pipeline.
98
- """
99
- # Hardcode the negative prompt as requested
100
- negative_prompt = " "
101
-
102
  if randomize_seed:
103
  seed = random.randint(0, MAX_SEED)
104
-
105
- # Set up the generator for reproducibility
106
  generator = torch.Generator(device=device).manual_seed(seed)
107
-
108
- # Load input images into PIL Images
109
  pil_images = []
110
- if images is not None:
111
- for item in images:
112
- try:
113
- if isinstance(item[0], Image.Image):
114
- pil_images.append(item[0].convert("RGB"))
115
- elif isinstance(item[0], str):
116
- pil_images.append(Image.open(item[0]).convert("RGB"))
117
- elif hasattr(item, "name"):
118
- pil_images.append(Image.open(item.name).convert("RGB"))
119
- except Exception:
120
- continue
121
-
122
- if height==256 and width==256:
123
- height, width = None, None
124
- print(f"Calling pipeline with prompt: '{prompt}'")
125
- print(f"Negative Prompt: '{negative_prompt}'")
126
- print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}")
127
-
128
-
129
- # Generate the image
130
- image = pipe(
131
- image=pil_images if len(pil_images) > 0 else None,
132
  prompt=prompt,
133
- height=height,
134
- width=width,
135
- negative_prompt=negative_prompt,
136
  num_inference_steps=num_inference_steps,
137
  generator=generator,
138
  true_cfg_scale=true_guidance_scale,
139
- num_images_per_prompt=num_images_per_prompt,
140
- ).images
141
-
142
- # Return images, seed, and make button visible
143
- return image, seed, gr.update(visible=True), gr.update(visible=True)
144
-
145
-
146
- # --- Examples and UI Layout ---
147
- examples = []
148
-
149
- css = """
150
- #col-container {
151
- margin: 0 auto;
152
- max-width: 1024px;
153
- }
154
- #logo-title {
155
- text-align: center;
156
- }
157
- #logo-title img {
158
- width: 400px;
159
- }
160
- #edit_text{margin-top: -62px !important}
161
- """
162
 
163
  with gr.Blocks(css=css) as demo:
164
  with gr.Column(elem_id="col-container"):
165
- gr.HTML("""
166
- <div id="logo-title">
167
- <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
168
- <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 96px">Next Scene 🎬</h2>
169
- </div>
170
- """)
171
- gr.Markdown("""
172
- This demo uses the new [Qwen-Image-Edit-2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) with [lovis93/next-scene-qwen-image-lora](https://huggingface.co/lovis93/next-scene-qwen-image-lora-2509) for cinematic image sequences with natural visual progression from frame to frame 🎥 and [Phr00t/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/Phr00t/Qwen-Image-Edit-Rapid-AIO/tree/main) + [AoT compilation & FA3](https://huggingface.co/blog/zerogpu-aoti) for accelerated 4-step inference.
173
- Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) to run locally with ComfyUI or diffusers.
174
- """)
175
  with gr.Row():
176
  with gr.Column():
177
- input_images = gr.Gallery(label="Input Images",
178
- show_label=False,
179
- type="pil",
180
- interactive=True)
181
-
182
- prompt = gr.Text(
183
- label="Prompt 🪄",
184
- show_label=True,
185
- placeholder="Next scene: The camera dollies in to a tight close-up...",
186
- )
187
- run_button = gr.Button("Edit!", variant="primary")
188
-
 
189
  with gr.Accordion("Advanced Settings", open=False):
190
-
191
-
192
- seed = gr.Slider(
193
- label="Seed",
194
- minimum=0,
195
- maximum=MAX_SEED,
196
- step=1,
197
- value=0,
198
- )
199
-
200
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
201
-
202
- with gr.Row():
203
-
204
- true_guidance_scale = gr.Slider(
205
- label="True guidance scale",
206
- minimum=1.0,
207
- maximum=10.0,
208
- step=0.1,
209
- value=1.0
210
- )
211
-
212
- num_inference_steps = gr.Slider(
213
- label="Number of inference steps",
214
- minimum=1,
215
- maximum=40,
216
- step=1,
217
- value=4,
218
- )
219
-
220
- height = gr.Slider(
221
- label="Height",
222
- minimum=256,
223
- maximum=2048,
224
- step=8,
225
- value=None,
226
- )
227
-
228
- width = gr.Slider(
229
- label="Width",
230
- minimum=256,
231
- maximum=2048,
232
- step=8,
233
- value=None,
234
- )
235
-
236
-
237
-
238
 
239
- with gr.Column():
240
- result = gr.Gallery(label="Result", show_label=False, type="pil")
241
  with gr.Row():
242
- use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
243
-
244
-
245
-
246
- gr.Examples(examples=[
247
- [["disaster_girl.jpg", "grumpy.png"], "Next Scene: the camera zooms in, showing the cat walking away from the fire"],
248
- [["wednesday.png"], "Next Scene: The camera pulls back and rises to an elevated angle, revealing the full dance floor with the choreographed movements of all dancers as the central figure becomes part of the larger ensemble."],
249
- ],
250
- inputs=[input_images, prompt],
251
- outputs=[result, seed],
252
- fn=infer,
253
- cache_examples="lazy")
254
-
255
-
256
-
257
-
258
- gr.on(
259
- triggers=[run_button.click, prompt.submit],
260
- fn=infer,
261
- inputs=[
262
- input_images,
263
- prompt,
264
- seed,
265
- randomize_seed,
266
- true_guidance_scale,
267
- num_inference_steps,
268
- height,
269
- width,
270
- ],
271
- outputs=[result, seed, use_output_btn],
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
 
275
- # Add the new event handler for the "Use Output as Input" button
276
- use_output_btn.click(
277
- fn=use_output_as_input,
278
- inputs=[result],
279
- outputs=[input_images]
280
  )
281
 
 
 
 
 
 
 
282
 
283
- if __name__ == "__main__":
284
- demo.launch()
 
11
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
 
 
14
  import math
15
  from huggingface_hub import hf_hub_download
16
  from safetensors.torch import load_file
17
 
18
  import os
19
+ import base64
20
+ from io import BytesIO
21
+ import json
22
+ import time # Added for history update delay
23
 
24
+ from gradio_client import Client, handle_file
25
  import tempfile
26
  from PIL import Image
27
  import os
 
42
  "dx8152/Qwen-Edit-2509-Multiple-angles",
43
  weight_name="镜头转换.safetensors", adapter_name="angles"
44
  )
45
+
46
+ pipe.load_lora_weights(
47
+ "lovis93/next-scene-qwen-image-lora-2509",
48
+ weight_name="next-scene_lora-v2-3000.safetensors", adapter_name="next-scene"
49
+ )
50
+ pipe.set_adapters(["angles","next-scene"], adapter_weights=[1., 1.])
51
  pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.)
52
+ pipe.fuse_lora(adapter_names=["next-scene"], lora_scale=1.)
53
  pipe.unload_lora_weights()
54
 
55
 
56
+ # # Apply the same optimizations from the first version
57
  pipe.transformer.__class__ = QwenImageTransformer2DModel
58
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
59
 
60
+ # # --- Ahead-of-time compilation ---
61
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
62
 
63
  # --- UI Constants and Helpers ---
64
  MAX_SEED = np.iinfo(np.int32).max
65
 
66
+ # --- Build natural language prompt from sliders ---
67
+ def build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup):
68
+ prompt_parts = []
 
 
69
 
70
+ # Rotation
71
+ if rotate_deg != 0:
72
+ direction = "left" if rotate_deg > 0 else "right"
73
+ prompt_parts.append(f"Rotate the camera {abs(rotate_deg)} degrees to the {direction}.")
74
+
75
+ # Movement
76
+ if move_lr > 0:
77
+ prompt_parts.append("Move the camera left.")
78
+ elif move_lr < 0:
79
+ prompt_parts.append("Move the camera right.")
80
+
81
+ if move_forward > 0:
82
+ prompt_parts.append("Move the camera forward.")
83
+ elif move_forward < 0:
84
+ prompt_parts.append("Move the camera backward.")
85
+
86
+ # Lens / perspective options
87
+ if topdown:
88
+ prompt_parts.append("Turn the camera to a top-down view.")
89
+ if wideangle:
90
+ prompt_parts.append("Turn the camera to a wide-angle lens.")
91
+ if closeup:
92
+ prompt_parts.append("Turn the camera to a close-up lens.")
93
+
94
+ final_prompt = " ".join(prompt_parts).strip()
95
+ return final_prompt if final_prompt else "No camera movement."
96
+
97
+
98
+ # --- Main inference function (unchanged backend) ---
99
  @spaces.GPU(duration=300)
100
+ def infer_camera_edit(
101
+ image,
102
+ prev_output,
103
+ rotate_deg,
104
+ move_lr,
105
+ move_forward,
106
+ topdown,
107
+ wideangle,
108
+ closeup,
109
+ seed,
110
+ randomize_seed,
111
+ true_guidance_scale,
112
+ num_inference_steps,
113
+ height,
114
+ width,
115
  ):
116
+ prompt = build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup)
117
+ print(f"Generated Prompt: {prompt}")
118
+
 
 
 
119
  if randomize_seed:
120
  seed = random.randint(0, MAX_SEED)
 
 
121
  generator = torch.Generator(device=device).manual_seed(seed)
122
+
123
+ # Use previous output if no new image uploaded
124
  pil_images = []
125
+ if image is not None:
126
+ if isinstance(image, Image.Image):
127
+ pil_images.append(image.convert("RGB"))
128
+ elif hasattr(image, "name"):
129
+ pil_images.append(Image.open(image.name).convert("RGB"))
130
+ elif prev_output is not None:
131
+ pil_images.append(prev_output.convert("RGB"))
132
+
133
+ if len(pil_images) == 0:
134
+ raise gr.Error("Please upload an image first.")
135
+
136
+ result = pipe(
137
+ image=pil_images,
 
 
 
 
 
 
 
 
 
138
  prompt=prompt,
139
+ height=height if height != 0 else None,
140
+ width=width if width != 0 else None,
 
141
  num_inference_steps=num_inference_steps,
142
  generator=generator,
143
  true_cfg_scale=true_guidance_scale,
144
+ num_images_per_prompt=1,
145
+ ).images[0]
146
+
147
+ return result, seed, prompt
148
+
149
+
150
+ # --- Gradio UI ---
151
+ css = '''
152
+ #col-container { max-width: 800px; margin: 0 auto; }
153
+ '''
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  with gr.Blocks(css=css) as demo:
156
  with gr.Column(elem_id="col-container"):
157
+ gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
158
+ gr.Markdown("Edit the same image from multiple camera angles using Qwen Edit and the 'Multiple Angles' LoRA. Each edit applies to the latest output for fluid camera movement.")
159
+
 
 
 
 
 
 
 
160
  with gr.Row():
161
  with gr.Column():
162
+ image = gr.Image(label="Input Image", type="pil", sources=["upload"])
163
+ prev_output = gr.State(value=None)
164
+
165
+ with gr.Tab("Camera Controls"):
166
+ rotate_deg = gr.Slider(
167
+ label="Rotate Left–Right (°)",
168
+ minimum=-90, maximum=90, step=45, value=0)
169
+ move_lr = gr.Slider(label="Move Right–Left", minimum=-10, maximum=10, step=1, value=0)
170
+ move_forward = gr.Slider(label="Move Forward/Backward", minimum=-10, maximum=10, step=1, value=0)
171
+ topdown = gr.Checkbox(label="Top-Down View", value=False)
172
+ wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
173
+ closeup = gr.Checkbox(label="Close-Up Lens", value=False)
174
+
175
  with gr.Accordion("Advanced Settings", open=False):
176
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
177
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
178
+ true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
179
+ num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
180
+ height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
181
+ width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
 
 
183
  with gr.Row():
184
+ reset_btn = gr.Button("Reset")
185
+ run_btn = gr.Button("Generate", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ with gr.Column():
188
+ result = gr.Image(label="Output Image")
189
+ prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)
190
+ gr.Markdown("_Each change applies a fresh camera instruction to the last output image._")
191
+
192
+ # Define inputs & outputs
193
+ inputs = [
194
+ image, prev_output, rotate_deg, move_lr, move_forward,
195
+ topdown, wideangle, closeup,
196
+ seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
197
+ ]
198
+ outputs = [result, seed, prompt_preview]
199
+
200
+ def reset_all():
201
+ return [0, 0, 0, False, False, False]
202
+
203
+ reset_btn.click(
204
+ fn=reset_all,
205
+ inputs=None,
206
+ outputs=[rotate_deg, move_lr, move_forward, topdown, wideangle, closeup],
207
+ queue=False
208
  )
209
 
210
+ run_event = run_btn.click(
211
+ fn=infer_camera_edit,
212
+ inputs=inputs,
213
+ outputs=outputs
 
214
  )
215
 
216
+ # Live updates on control release
217
+ for control in [rotate_deg, move_lr, move_forward, topdown, wideangle, closeup]:
218
+ control.change(fn=infer_camera_edit, inputs=inputs, outputs=outputs, show_progress="minimal")
219
+
220
+ # Save latest output as next input
221
+ run_event.then(lambda img, *_: img, inputs=outputs, outputs=[prev_output])
222
 
223
+ demo.launch()