Ephemeral182 commited on
Commit
359c460
·
verified ·
1 Parent(s): 6a36790

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -160
app.py CHANGED
@@ -18,21 +18,17 @@ DEFAULT_CUSTOM_WEIGHTS_PATH = "PosterCraft/PosterCraft-v1_RL"
18
  MAX_SEED = np.iinfo(np.int32).max
19
  MAX_IMAGE_SIZE = 2048
20
 
21
- # No need to manually set CUDA device on Spaces
22
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
- torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
24
-
25
  logging.basicConfig(
26
  level=logging.INFO,
27
  format="%(asctime)s - %(levelname)s - %(message)s",
28
  )
29
 
30
  # ------------------------------------------------------------------
31
- # 2. Model Download Function (Referencing JarvisIR's approach)
32
  # ------------------------------------------------------------------
33
  def download_model_weights(target_dir, repo_id, subdir=None):
34
  """
35
- Download model weights to specified directory
36
 
37
  Args:
38
  target_dir (str): Local target directory
@@ -51,7 +47,6 @@ def download_model_weights(target_dir, repo_id, subdir=None):
51
 
52
  try:
53
  if subdir:
54
- # If subdirectory is specified, only download that subdirectory
55
  snapshot_download(
56
  repo_id=repo_id,
57
  repo_type="model",
@@ -61,7 +56,6 @@ def download_model_weights(target_dir, repo_id, subdir=None):
61
  )
62
  src_dir = os.path.join(tmp_dir, subdir)
63
  else:
64
- # Download entire repository
65
  snapshot_download(
66
  repo_id=repo_id,
67
  repo_type="model",
@@ -70,7 +64,6 @@ def download_model_weights(target_dir, repo_id, subdir=None):
70
  )
71
  src_dir = tmp_dir
72
 
73
- # Copy to target directory
74
  if os.path.exists(src_dir):
75
  shutil.copytree(src_dir, target_dir)
76
  logging.info(f"Successfully downloaded {repo_id} to {target_dir}")
@@ -80,22 +73,50 @@ def download_model_weights(target_dir, repo_id, subdir=None):
80
  except Exception as e:
81
  logging.error(f"Download failed: {e}")
82
  finally:
83
- # Clean up temporary directory
84
  if os.path.exists(tmp_dir):
85
  shutil.rmtree(tmp_dir)
86
 
87
  # ------------------------------------------------------------------
88
- # 3. Qwen Prompt Rewriting Agent
89
  # ------------------------------------------------------------------
90
- class QwenRecapAgent:
91
- def __init__(self, model_path: str):
92
- self.model_path = model_path
93
- self.tokenizer = None
94
- self.model = None
95
- self.is_loaded = False
96
-
97
- self.prompt_template = (
98
- """You are an expert poster prompt designer. Your task is to rewrite a user's short poster prompt into a detailed and vivid long-format prompt. Follow these steps carefully:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  **Step 1: Analyze the Core Requirements**
100
  Identify the key elements in the user's prompt. Do not miss any details.
101
  - **Subject:** What is the main subject? (e.g., a person, an object, a scene)
@@ -103,12 +124,14 @@ Identify the key elements in the user's prompt. Do not miss any details.
103
  - **Text:** Is there any text, like a title or slogan?
104
  - **Color Palette:** Are there specific colors mentioned?
105
  - **Composition:** Are there any layout instructions?
 
106
  **Step 2: Expand and Add Detail**
107
  Elaborate on each core requirement to create a rich description.
108
  - **Do Not Omit:** You must include every piece of information from the original prompt.
109
  - **Enrich with Specifics:** Add professional and descriptive details.
110
  - **Example:** If the user says "a woman with a bow", you could describe her as "a young woman with a determined expression, holding a finely crafted wooden longbow, with an arrow nocked and ready to fire."
111
  - **Fill in the Gaps:** If the original prompt is simple (e.g., "a poster for a coffee shop"), use your creativity to add fitting details. You might add "The poster features a top-down view of a steaming latte with delicate art on its foam, placed on a rustic wooden table next to a few scattered coffee beans."
 
112
  **Step 3: Handle Text Precisely**
113
  - **Identify All Text Elements:** Carefully look for any text mentioned in the prompt. This includes:
114
  - **Explicit Text:** Subtitles, slogans, or any text in quotes.
@@ -121,142 +144,44 @@ Elaborate on each core requirement to create a rich description.
121
  - **If No Text Exists:**
122
  - Do not add any text elements. The poster must be purely visual.
123
  - Most posters have titles. When a title exists, you must extend the title's description. Only when you are absolutely sure that there is no text to render, you can allow the extended prompt not to render text.
 
124
  **Step 4: Final Output Rules**
125
  - **Output ONLY the rewritten prompt.** No introductions, no explanations, no "Here is the prompt:".
126
  - **Use a descriptive and confident tone.** Write as if you are describing a finished, beautiful poster.
127
  - **Keep it concise.** The final prompt should be under 300 words.
 
128
  ---
129
  **User Prompt:**
130
  {brief_description}"""
131
- )
132
-
133
- def _load_model(self):
134
- """Lazy load model"""
135
- if not self.is_loaded:
136
- logging.info(f"Loading Qwen model: {self.model_path}")
137
-
138
- # Ensure model files exist, if not download from Hub
139
- if not os.path.exists(self.model_path):
140
- download_model_weights(self.model_path, DEFAULT_QWEN_MODEL_PATH)
141
-
142
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
143
- self.model = AutoModelForCausalLM.from_pretrained(
144
- self.model_path,
145
- torch_dtype=torch_dtype,
146
- device_map="auto"
147
- )
148
- self.is_loaded = True
149
-
150
- def recap(self, text: str) -> str:
151
- try:
152
- self._load_model()
153
-
154
- messages = [
155
- {"role": "user", "content": self.prompt_template.format(brief_description=text)}
156
- ]
157
- chat = self.tokenizer.apply_chat_template(
158
- messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
159
- )
160
- inputs = self.tokenizer([chat], return_tensors="pt").to(self.model.device)
161
-
162
- with torch.no_grad():
163
- ids = self.model.generate(
164
- **inputs, max_new_tokens=1024, temperature=0.6, do_sample=True
165
- )
166
- out = self.tokenizer.decode(
167
- ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True
168
- ).strip()
169
-
170
- if "</think>" in out:
171
- out = out.split("</think>")[-1].strip()
172
- return out or text
173
- except Exception as e:
174
- logging.warning(f"Recap failed: {e}. Using original prompt.")
175
- return text
176
-
177
- # ------------------------------------------------------------------
178
- # 4. Poster Generator
179
- # ------------------------------------------------------------------
180
- class PosterGenerator:
181
- def __init__(self):
182
- self.pipeline = None
183
- self.qwen = None
184
- self.is_loaded = False
185
-
186
- def _load_models(self):
187
- """Lazy load all models"""
188
- if not self.is_loaded:
189
- logging.info("Starting model loading...")
190
-
191
- # Download custom weights (if not exists)
192
- custom_weights_local = "local_weights/PosterCraft-v1_RL"
193
- if not os.path.exists(custom_weights_local):
194
- logging.info("Downloading custom Transformer weights...")
195
- download_model_weights(custom_weights_local, DEFAULT_CUSTOM_WEIGHTS_PATH)
196
-
197
- # Load FLUX pipeline
198
- logging.info("Loading FLUX pipeline...")
199
- self.pipeline = FluxPipeline.from_pretrained(
200
- DEFAULT_PIPELINE_PATH,
201
- torch_dtype=torch_dtype
202
- )
203
-
204
- # Load custom Transformer
205
- if os.path.exists(custom_weights_local):
206
- try:
207
- logging.info("Loading custom Transformer...")
208
- transformer = FluxTransformer2DModel.from_pretrained(
209
- custom_weights_local,
210
- torch_dtype=torch_dtype
211
- )
212
- self.pipeline.transformer = transformer
213
- logging.info("Custom Transformer loaded successfully")
214
- except Exception as e:
215
- logging.warning(f"Custom weights loading failed: {e}, using default weights")
216
-
217
- # Enable memory optimization
218
- self.pipeline.enable_model_cpu_offload()
219
-
220
- # Initialize Qwen (lazy loading)
221
- qwen_local = "local_weights/Qwen3-8B"
222
- if not os.path.exists(qwen_local):
223
- logging.info("Downloading Qwen model...")
224
- download_model_weights(qwen_local, DEFAULT_QWEN_MODEL_PATH)
225
-
226
- self.qwen = QwenRecapAgent(qwen_local)
227
-
228
- self.is_loaded = True
229
- logging.info("All models loaded successfully")
230
 
231
- def generate(self, prompt, enable_recap, **kwargs):
232
- """Generate poster with given parameters"""
233
- final_prompt = prompt
234
- if enable_recap:
235
- if not self.qwen:
236
- raise gr.Error("Recap is enabled, but the recap model is not available. Check model path.")
237
- final_prompt = self.qwen.recap(prompt)
238
-
239
- generator = torch.Generator(device="cpu").manual_seed(kwargs['seed'])
240
 
241
- with torch.inference_mode():
242
- image = self.pipeline(
243
- prompt=final_prompt,
244
- generator=generator,
245
- num_inference_steps=kwargs['num_inference_steps'],
246
- guidance_scale=kwargs['guidance_scale'],
247
- width=kwargs['width'],
248
- height=kwargs['height']
249
- ).images[0]
250
 
251
- return image, final_prompt
252
-
253
- # Global instance
254
- poster_gen = PosterGenerator()
 
 
255
 
256
  # ------------------------------------------------------------------
257
  # 5. ZeroGPU Inference Function
258
  # ------------------------------------------------------------------
259
- @spaces.GPU(duration=120)
260
  def generate_image_interface(
261
  original_prompt, enable_recap, height, width,
262
  num_inference_steps, guidance_scale, seed_input,
@@ -268,45 +193,94 @@ def generate_image_interface(
268
  if width > MAX_IMAGE_SIZE or height > MAX_IMAGE_SIZE:
269
  raise gr.Error(f"Maximum resolution limit is {MAX_IMAGE_SIZE}×{MAX_IMAGE_SIZE}")
270
 
271
- progress(0, desc="Loading models...")
272
-
273
  try:
274
  actual_seed = int(seed_input) if seed_input and seed_input > 0 else random.randint(1, 2**32 - 1)
275
 
276
- # Ensure models are loaded
277
- poster_gen._load_models()
278
 
279
- image, final_prompt = poster_gen.generate(
280
- prompt=original_prompt,
281
- enable_recap=enable_recap,
282
- height=int(height),
283
- width=int(width),
284
- num_inference_steps=int(num_inference_steps),
285
- guidance_scale=float(guidance_scale),
286
- seed=actual_seed
287
  )
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  status_log = f"Seed: {actual_seed} | Generation complete."
290
- progress(1, desc="Generation complete!")
291
  return image, final_prompt, status_log
292
 
293
  except Exception as e:
294
  logging.error(f"Generation failed: {e}")
295
- raise gr.Error(f"An error occurred: {e}")
296
 
297
  # ------------------------------------------------------------------
298
- # 6. Gradio Interface (Similar to demo format)
299
  # ------------------------------------------------------------------
300
  with gr.Blocks(theme=gr.themes.Soft(), title="PosterCraft") as demo:
301
  gr.Markdown("# PosterCraft-v1.0")
302
- gr.Markdown(f"Running on: **{device}** | Base Pipeline: **{DEFAULT_PIPELINE_PATH}**")
303
  gr.Markdown("⚠️ **First use requires model download, please wait about 10-15 minutes**")
304
 
305
  with gr.Row():
306
  with gr.Column(scale=1):
307
  gr.Markdown("### 1. Configuration")
308
  prompt_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter your creative prompt...")
309
- enable_recap_checkbox = gr.Checkbox(label="Enable Prompt Recap", value=True, info=f"Uses {DEFAULT_QWEN_MODEL_PATH} for rewriting.")
310
 
311
  with gr.Row():
312
  width_input = gr.Slider(label="Width", minimum=256, maximum=2048, value=832, step=64)
@@ -333,4 +307,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="PosterCraft") as demo:
333
  generate_button.click(fn=generate_image_interface, inputs=inputs_list, outputs=outputs_list)
334
 
335
  if __name__ == "__main__":
336
- demo.launch()
 
18
  MAX_SEED = np.iinfo(np.int32).max
19
  MAX_IMAGE_SIZE = 2048
20
 
 
 
 
 
21
  logging.basicConfig(
22
  level=logging.INFO,
23
  format="%(asctime)s - %(levelname)s - %(message)s",
24
  )
25
 
26
  # ------------------------------------------------------------------
27
+ # 2. Model Download Function (CPU only)
28
  # ------------------------------------------------------------------
29
  def download_model_weights(target_dir, repo_id, subdir=None):
30
  """
31
+ Download model weights to specified directory (CPU operation)
32
 
33
  Args:
34
  target_dir (str): Local target directory
 
47
 
48
  try:
49
  if subdir:
 
50
  snapshot_download(
51
  repo_id=repo_id,
52
  repo_type="model",
 
56
  )
57
  src_dir = os.path.join(tmp_dir, subdir)
58
  else:
 
59
  snapshot_download(
60
  repo_id=repo_id,
61
  repo_type="model",
 
64
  )
65
  src_dir = tmp_dir
66
 
 
67
  if os.path.exists(src_dir):
68
  shutil.copytree(src_dir, target_dir)
69
  logging.info(f"Successfully downloaded {repo_id} to {target_dir}")
 
73
  except Exception as e:
74
  logging.error(f"Download failed: {e}")
75
  finally:
 
76
  if os.path.exists(tmp_dir):
77
  shutil.rmtree(tmp_dir)
78
 
79
  # ------------------------------------------------------------------
80
+ # 3. Pre-download models (CPU operation)
81
  # ------------------------------------------------------------------
82
+ def ensure_models_downloaded():
83
+ """Pre-download all models to avoid GPU timeout"""
84
+ logging.info("Checking and downloading models if needed...")
85
+
86
+ # Download custom weights
87
+ custom_weights_local = "local_weights/PosterCraft-v1_RL"
88
+ if not os.path.exists(custom_weights_local):
89
+ logging.info("Downloading custom Transformer weights...")
90
+ download_model_weights(custom_weights_local, DEFAULT_CUSTOM_WEIGHTS_PATH)
91
+
92
+ # Download Qwen model
93
+ qwen_local = "local_weights/Qwen3-8B"
94
+ if not os.path.exists(qwen_local):
95
+ logging.info("Downloading Qwen model...")
96
+ download_model_weights(qwen_local, DEFAULT_QWEN_MODEL_PATH)
97
+
98
+ logging.info("Model download check completed")
99
+
100
+ # Pre-download models at startup (CPU)
101
+ ensure_models_downloaded()
102
+
103
+ # ------------------------------------------------------------------
104
+ # 4. Qwen Prompt Rewriting Agent
105
+ # ------------------------------------------------------------------
106
+ def create_qwen_agent(model_path):
107
+ """Create Qwen agent inside GPU context"""
108
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
109
+ model = AutoModelForCausalLM.from_pretrained(
110
+ model_path,
111
+ torch_dtype=torch.bfloat16,
112
+ device_map="auto"
113
+ )
114
+ return tokenizer, model
115
+
116
+ def recap_prompt(tokenizer, model, text):
117
+ """Recap prompt using Qwen model"""
118
+ prompt_template = """You are an expert poster prompt designer. Your task is to rewrite a user's short poster prompt into a detailed and vivid long-format prompt. Follow these steps carefully:
119
+
120
  **Step 1: Analyze the Core Requirements**
121
  Identify the key elements in the user's prompt. Do not miss any details.
122
  - **Subject:** What is the main subject? (e.g., a person, an object, a scene)
 
124
  - **Text:** Is there any text, like a title or slogan?
125
  - **Color Palette:** Are there specific colors mentioned?
126
  - **Composition:** Are there any layout instructions?
127
+
128
  **Step 2: Expand and Add Detail**
129
  Elaborate on each core requirement to create a rich description.
130
  - **Do Not Omit:** You must include every piece of information from the original prompt.
131
  - **Enrich with Specifics:** Add professional and descriptive details.
132
  - **Example:** If the user says "a woman with a bow", you could describe her as "a young woman with a determined expression, holding a finely crafted wooden longbow, with an arrow nocked and ready to fire."
133
  - **Fill in the Gaps:** If the original prompt is simple (e.g., "a poster for a coffee shop"), use your creativity to add fitting details. You might add "The poster features a top-down view of a steaming latte with delicate art on its foam, placed on a rustic wooden table next to a few scattered coffee beans."
134
+
135
  **Step 3: Handle Text Precisely**
136
  - **Identify All Text Elements:** Carefully look for any text mentioned in the prompt. This includes:
137
  - **Explicit Text:** Subtitles, slogans, or any text in quotes.
 
144
  - **If No Text Exists:**
145
  - Do not add any text elements. The poster must be purely visual.
146
  - Most posters have titles. When a title exists, you must extend the title's description. Only when you are absolutely sure that there is no text to render, you can allow the extended prompt not to render text.
147
+
148
  **Step 4: Final Output Rules**
149
  - **Output ONLY the rewritten prompt.** No introductions, no explanations, no "Here is the prompt:".
150
  - **Use a descriptive and confident tone.** Write as if you are describing a finished, beautiful poster.
151
  - **Keep it concise.** The final prompt should be under 300 words.
152
+
153
  ---
154
  **User Prompt:**
155
  {brief_description}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ try:
158
+ messages = [
159
+ {"role": "user", "content": prompt_template.format(brief_description=text)}
160
+ ]
161
+ chat = tokenizer.apply_chat_template(
162
+ messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
163
+ )
164
+ inputs = tokenizer([chat], return_tensors="pt").to(model.device)
 
165
 
166
+ with torch.no_grad():
167
+ ids = model.generate(
168
+ **inputs, max_new_tokens=1024, temperature=0.6, do_sample=True
169
+ )
170
+ out = tokenizer.decode(
171
+ ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True
172
+ ).strip()
 
 
173
 
174
+ if "</think>" in out:
175
+ out = out.split("</think>")[-1].strip()
176
+ return out or text
177
+ except Exception as e:
178
+ logging.warning(f"Recap failed: {e}. Using original prompt.")
179
+ return text
180
 
181
  # ------------------------------------------------------------------
182
  # 5. ZeroGPU Inference Function
183
  # ------------------------------------------------------------------
184
+ @spaces.GPU(duration=300) # 增加到5分钟,给模型加载更多时间
185
  def generate_image_interface(
186
  original_prompt, enable_recap, height, width,
187
  num_inference_steps, guidance_scale, seed_input,
 
193
  if width > MAX_IMAGE_SIZE or height > MAX_IMAGE_SIZE:
194
  raise gr.Error(f"Maximum resolution limit is {MAX_IMAGE_SIZE}×{MAX_IMAGE_SIZE}")
195
 
 
 
196
  try:
197
  actual_seed = int(seed_input) if seed_input and seed_input > 0 else random.randint(1, 2**32 - 1)
198
 
199
+ progress(0.1, desc="Loading FLUX pipeline...")
 
200
 
201
+ # Load FLUX pipeline
202
+ pipeline = FluxPipeline.from_pretrained(
203
+ DEFAULT_PIPELINE_PATH,
204
+ torch_dtype=torch.bfloat16
 
 
 
 
205
  )
206
 
207
+ progress(0.2, desc="Loading custom transformer...")
208
+
209
+ # Load custom transformer if available
210
+ custom_weights_local = "local_weights/PosterCraft-v1_RL"
211
+ if os.path.exists(custom_weights_local):
212
+ try:
213
+ transformer = FluxTransformer2DModel.from_pretrained(
214
+ custom_weights_local,
215
+ torch_dtype=torch.bfloat16
216
+ )
217
+ pipeline.transformer = transformer
218
+ logging.info("Custom Transformer loaded successfully")
219
+ except Exception as e:
220
+ logging.warning(f"Custom weights loading failed: {e}, using default weights")
221
+
222
+ # Move pipeline to GPU
223
+ pipeline = pipeline.to("cuda")
224
+
225
+ final_prompt = original_prompt
226
+
227
+ if enable_recap:
228
+ progress(0.4, desc="Loading Qwen model for prompt enhancement...")
229
+
230
+ qwen_local = "local_weights/Qwen3-8B"
231
+ if os.path.exists(qwen_local):
232
+ try:
233
+ tokenizer, model = create_qwen_agent(qwen_local)
234
+ final_prompt = recap_prompt(tokenizer, model, original_prompt)
235
+ progress(0.6, desc="Prompt enhanced, starting generation...")
236
+
237
+ # Clean up Qwen model to free memory
238
+ del tokenizer, model
239
+ torch.cuda.empty_cache()
240
+ except Exception as e:
241
+ logging.warning(f"Qwen model failed: {e}, using original prompt")
242
+ final_prompt = original_prompt
243
+ else:
244
+ logging.warning("Qwen model not found, using original prompt")
245
+ final_prompt = original_prompt
246
+
247
+ progress(0.7, desc="Generating image...")
248
+
249
+ # Generate image
250
+ generator = torch.Generator(device="cuda").manual_seed(actual_seed)
251
+
252
+ with torch.inference_mode():
253
+ image = pipeline(
254
+ prompt=final_prompt,
255
+ generator=generator,
256
+ num_inference_steps=int(num_inference_steps),
257
+ guidance_scale=float(guidance_scale),
258
+ width=int(width),
259
+ height=int(height)
260
+ ).images[0]
261
+
262
+ progress(1.0, desc="Generation complete!")
263
+
264
  status_log = f"Seed: {actual_seed} | Generation complete."
 
265
  return image, final_prompt, status_log
266
 
267
  except Exception as e:
268
  logging.error(f"Generation failed: {e}")
269
+ raise gr.Error(f"An error occurred: {str(e)}")
270
 
271
  # ------------------------------------------------------------------
272
+ # 6. Gradio Interface
273
  # ------------------------------------------------------------------
274
  with gr.Blocks(theme=gr.themes.Soft(), title="PosterCraft") as demo:
275
  gr.Markdown("# PosterCraft-v1.0")
276
+ gr.Markdown(f"Base Pipeline: **{DEFAULT_PIPELINE_PATH}**")
277
  gr.Markdown("⚠️ **First use requires model download, please wait about 10-15 minutes**")
278
 
279
  with gr.Row():
280
  with gr.Column(scale=1):
281
  gr.Markdown("### 1. Configuration")
282
  prompt_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter your creative prompt...")
283
+ enable_recap_checkbox = gr.Checkbox(label="Enable Prompt Recap", value=True, info="Uses Qwen3-8B for prompt enhancement")
284
 
285
  with gr.Row():
286
  width_input = gr.Slider(label="Width", minimum=256, maximum=2048, value=832, step=64)
 
307
  generate_button.click(fn=generate_image_interface, inputs=inputs_list, outputs=outputs_list)
308
 
309
  if __name__ == "__main__":
310
+ demo.launch()