TheAIBoi commited on
Commit
8af5338
·
verified ·
1 Parent(s): b0ede5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -79
app.py CHANGED
@@ -1,11 +1,15 @@
1
  import gradio as gr
2
  import numpy as np
3
  import random
4
-
5
- import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import StableDiffusionXLPipeline, AutoencoderKL, StableDiffusionXLImg2ImgPipeline
7
  import torch
8
  from typing import Tuple
 
 
 
 
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model_repo_id = "RunDiffusion/Juggernaut-XL-v9" # Replace to the model you would like to use
@@ -27,16 +31,23 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
27
  )
28
  pipe.to(device)
29
 
30
- pipe_img2img = StableDiffusionXLImg2ImgPipeline.from_pretrained(
31
- "RunDiffusion/Juggernaut-XL-v9",
32
- vae=vae,
33
- torch_dtype=torch.float16,
34
- custom_pipeline="lpw_stable_diffusion_xl",
35
- use_safetensors=True,
36
- add_watermarker=False,
37
- variant="fp16",
38
- )
39
- pipe_img2img.to(device)
 
 
 
 
 
 
 
40
 
41
  MAX_SEED = np.iinfo(np.int32).max
42
  MAX_IMAGE_SIZE = 4096
@@ -104,19 +115,22 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
104
  negative = ""
105
  return p.replace("{prompt}", positive), n + negative
106
 
107
- @spaces.GPU #[uncomment to use ZeroGPU]
108
  def infer(
109
  prompt,
110
  negative_prompt,
111
  style,
 
 
 
 
 
112
  seed,
113
  randomize_seed,
114
  width,
115
  height,
116
  guidance_scale,
117
  num_inference_steps,
118
- input_image=None, # New parameter for input image
119
- strength=0.8, # New parameter for img2img strength
120
  progress=gr.Progress(track_tqdm=True),
121
  ):
122
  if randomize_seed:
@@ -124,71 +138,94 @@ def infer(
124
  prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
125
  generator = torch.Generator().manual_seed(seed)
126
 
127
- if input_image is not None:
128
- # Use img2img pipeline if an image is provided
129
- image = pipe_img2img(
130
- prompt=prompt,
131
- image=input_image, # Pass the input image
132
- strength=strength, # Control how much the image is changed
133
- negative_prompt=negative_prompt,
134
- guidance_scale=guidance_scale,
135
- num_inference_steps=num_inference_steps,
136
- generator=generator,
137
- ).images[0]
138
- else:
139
- # Use text2img pipeline otherwise
140
- image = pipe(
141
- prompt=prompt,
142
- negative_prompt=negative_prompt,
143
- guidance_scale=guidance_scale,
144
- num_inference_steps=num_inference_steps,
145
- width=width,
146
- height=height,
147
- generator=generator,
148
- ).images[0]
149
 
150
- return image, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  examples = [
154
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
155
- "An astronaut riding a green horse",
156
- "A delicious ceviche cheesecake slice",
157
  ]
158
-
159
- css = """
160
- #col-container {
161
  margin: 0 auto;
162
  max-width: 640px;
163
- }
164
- """
165
 
166
  with gr.Blocks(css=css) as demo:
167
  with gr.Column(elem_id="col-container"):
168
- gr.Markdown(" # ImageGen, the fastest and most precise image generator")
169
  with gr.Row():
170
  prompt = gr.Text(
171
  label="Prompt",
172
  show_label=False,
173
  max_lines=1,
174
- placeholder="Enter your prompt",
175
  container=False,
176
  )
177
- run_button = gr.Button("Run", scale=0, variant="primary")
178
  result = gr.Image(label="Result", show_label=False)
179
 
180
- # Add image input and strength slider
181
- with gr.Row():
182
- input_image = gr.Image(type="pil", label="Input Image (Optional)", show_label=True, height=200)
183
- with gr.Column():
184
- strength = gr.Slider(
185
- label="Image Strength",
186
- minimum=0.0,
187
- maximum=1.0,
188
- step=0.01,
189
- value=0.8, # Default strength for img2img
190
- visible=True, # Make it visible if you want it always there, or toggle visibility with JS
191
- )
 
 
 
 
 
 
 
 
192
 
193
  with gr.Row(visible=True):
194
  style_selection = gr.Radio(
@@ -199,15 +236,13 @@ with gr.Blocks(css=css) as demo:
199
  value=DEFAULT_STYLE_NAME,
200
  label="Image Style",
201
  )
202
-
203
  with gr.Accordion("Advanced Settings", open=False):
204
  negative_prompt = gr.Text(
205
  label="Negative prompt",
206
  max_lines=1,
207
- placeholder="Enter a negative prompt",
208
  visible=False,
209
  )
210
-
211
  seed = gr.Slider(
212
  label="Seed",
213
  minimum=0,
@@ -215,44 +250,39 @@ with gr.Blocks(css=css) as demo:
215
  step=1,
216
  value=0,
217
  )
218
-
219
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
220
-
221
  with gr.Row():
222
  width = gr.Slider(
223
  label="Width",
224
  minimum=256,
225
  maximum=MAX_IMAGE_SIZE,
226
  step=32,
227
- value=4096, # Replace with defaults that work for your model
228
  )
229
-
230
  height = gr.Slider(
231
  label="Height",
232
  minimum=256,
233
  maximum=MAX_IMAGE_SIZE,
234
  step=32,
235
- value=4096, # Replace with defaults that work for your model
236
  )
237
-
238
  with gr.Row():
239
  guidance_scale = gr.Slider(
240
  label="Guidance scale",
241
  minimum=0.0,
242
- maximum=10.0,
243
  step=0.1,
244
- value=0.0, # Replace with defaults that work for your model
245
  )
246
-
247
  num_inference_steps = gr.Slider(
248
  label="Number of inference steps",
249
  minimum=1,
250
- maximum=500,
251
  step=1,
252
- value=500, # Replace with defaults that work for your model
253
  )
254
-
255
  gr.Examples(examples=examples, inputs=[prompt])
 
256
  gr.on(
257
  triggers=[run_button.click, prompt.submit],
258
  fn=infer,
@@ -260,14 +290,16 @@ with gr.Blocks(css=css) as demo:
260
  prompt,
261
  negative_prompt,
262
  style_selection,
 
 
 
 
263
  seed,
264
  randomize_seed,
265
  width,
266
  height,
267
  guidance_scale,
268
  num_inference_steps,
269
- input_image, # Add input_image to inputs
270
- strength, # Add strength to inputs
271
  ],
272
  outputs=[result, seed],
273
  )
 
1
  import gradio as gr
2
  import numpy as np
3
  import random
4
+ import spaces
5
+ from diffusers import StableDiffusionXLPipeline, AutoencoderKL, ControlNetModel
6
+ from diffusers.utils import load_image
7
  import torch
8
  from typing import Tuple
9
+ from PIL import Image
10
+ from controlnet_aux import OpenposeDetector
11
+ import insightface
12
+ import onnxruntime
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  model_repo_id = "RunDiffusion/Juggernaut-XL-v9" # Replace to the model you would like to use
 
31
  )
32
  pipe.to(device)
33
 
34
+ controlnet_openpose = ControlNetModel.from_pretrained(
35
+ "lllyasviel/control_v11p_sdxl_openpose", torch_dtype=torch.float16
36
+ ).to(device)
37
+
38
+ openpose_detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet/annotator/ckpts/body_pose_model.pth").to(device)
39
+
40
+ try:
41
+ pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-faceid_sdxl.bin")
42
+ except Exception as e:
43
+ print(f"Could not load IP-Adapter FaceID. Make sure the model exists and paths are correct: {e}")
44
+ print("Trying a common alternative: ip-adapter-plus-face_sdxl_vit-h.safetensors")
45
+ try:
46
+ pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors")
47
+ except Exception as e2:
48
+ print(f"Could not load second IP-Adapter variant: {e2}")
49
+ print("IP-Adapter will not be available. Please check your IP-Adapter setup.")
50
+ pipe.unload_ip_adapter()
51
 
52
  MAX_SEED = np.iinfo(np.int32).max
53
  MAX_IMAGE_SIZE = 4096
 
115
  negative = ""
116
  return p.replace("{prompt}", positive), n + negative
117
 
118
+ @spaces.GPU
119
  def infer(
120
  prompt,
121
  negative_prompt,
122
  style,
123
+ # Removed general img2img reference as we are specializing
124
+ input_image_pose, # New: for ControlNet OpenPose
125
+ pose_strength, # New: strength for ControlNet
126
+ input_image_face, # New: for IP-Adapter Face
127
+ face_fidelity, # New: fidelity/strength for IP-Adapter
128
  seed,
129
  randomize_seed,
130
  width,
131
  height,
132
  guidance_scale,
133
  num_inference_steps,
 
 
134
  progress=gr.Progress(track_tqdm=True),
135
  ):
136
  if randomize_seed:
 
138
  prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
139
  generator = torch.Generator().manual_seed(seed)
140
 
141
+ # --- NEW: Prepare ControlNet and IP-Adapter inputs ---
142
+ controlnet_images = []
143
+ controlnet_conditioning_scales = []
144
+ controlnet_models_to_use = []
145
+ ip_adapter_image_embeddings = None # Will store the face embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ # Process Pose Reference
148
+ if input_image_pose:
149
+ # Preprocess the image to get the OpenPose skeleton
150
+ processed_pose_image = openpose_detector(input_image_pose)
151
+ controlnet_images.append(processed_pose_image)
152
+ controlnet_conditioning_scales.append(pose_strength)
153
+ controlnet_models_to_use.append(controlnet_openpose)
154
+
155
+ # Process Face Reference (IP-Adapter)
156
+ if input_image_face and pipe.has_lora_weights("ip_adapter"): # Check if IP-Adapter was loaded successfully
157
+ # For IP-Adapter FaceID, the pipeline itself usually handles embedding extraction
158
+ # You just pass the image directly.
159
+ # The scale is set before the call.
160
+ pipe.set_ip_adapter_scale(face_fidelity)
161
+ # ip_adapter_image_embeddings = pipe.encode_ip_adapter_image(input_image_face) # If you need to manually encode
162
+ # Often, you just pass the image to the main call directly if IP-Adapter is loaded.
163
+
164
+ # --- END NEW INPUT PREPARATION ---
165
 
166
+ # Adjusting the pipe call to use ControlNet and IP-Adapter
167
+ # Note: If no reference images are provided, it will fall back to text-to-image.
168
+ image = pipe(
169
+ prompt=prompt,
170
+ negative_prompt=negative_prompt,
171
+ image=controlnet_images if controlnet_images else None, # Pass processed pose image(s) if available
172
+ controlnet_conditioning_scale=controlnet_conditioning_scales if controlnet_conditioning_scales else None,
173
+ controlnet=controlnet_models_to_use if controlnet_models_to_use else None,
174
+ ip_adapter_image=input_image_face if input_image_face else None, # Pass the raw face image for IP-Adapter
175
+ # ip_adapter_image_embeds=ip_adapter_image_embeddings, # Use this if you pre-encode
176
+ guidance_scale=guidance_scale,
177
+ num_inference_steps=num_inference_steps,
178
+ width=width,
179
+ height=height,
180
+ generator=generator,
181
+ ).images[0]
182
+
183
+ return image, seed
184
 
185
  examples = [
186
+ "A stunning woman standing on a beach at sunset, dramatic lighting, highly detailed",
187
+ "A man in a futuristic city, cyberpunk style, neon lights",
188
+ "An AI model posing with a friendly robot in a studio, professional photoshoot",
189
  ]
190
+ css = """#col-container {
 
 
191
  margin: 0 auto;
192
  max-width: 640px;
193
+ }"""
 
194
 
195
  with gr.Blocks(css=css) as demo:
196
  with gr.Column(elem_id="col-container"):
197
+ gr.Markdown(" # AI Instagram Model Creator")
198
  with gr.Row():
199
  prompt = gr.Text(
200
  label="Prompt",
201
  show_label=False,
202
  max_lines=1,
203
+ placeholder="Describe your AI model and scene (e.g., 'A confident woman in a red dress, city background')",
204
  container=False,
205
  )
206
+ run_button = gr.Button("Generate", scale=0, variant="primary")
207
  result = gr.Image(label="Result", show_label=False)
208
 
209
+ with gr.Accordion("Reference Images", open=True):
210
+ gr.Markdown("Upload images to control pose and face consistency.")
211
+ input_image_pose = gr.Image(label="Human Pose Reference (for body posture)", type="pil", show_label=True)
212
+ pose_strength = gr.Slider(
213
+ label="Pose Control Strength (0.0 = ignore, 1.0 = strict adherence)",
214
+ minimum=0.0,
215
+ maximum=1.0,
216
+ step=0.01,
217
+ value=0.8, # Good starting point for strong pose control
218
+ )
219
+ gr.Markdown("---") # Separator
220
+
221
+ input_image_face = gr.Image(label="Face Reference (for facial consistency)", type="pil", show_label=True)
222
+ face_fidelity = gr.Slider(
223
+ label="Face Fidelity (0.0 = ignore, 1.0 = highly similar)",
224
+ minimum=0.0,
225
+ maximum=1.0,
226
+ step=0.01,
227
+ value=0.7, # Good starting point for face transfer
228
+ )
229
 
230
  with gr.Row(visible=True):
231
  style_selection = gr.Radio(
 
236
  value=DEFAULT_STYLE_NAME,
237
  label="Image Style",
238
  )
 
239
  with gr.Accordion("Advanced Settings", open=False):
240
  negative_prompt = gr.Text(
241
  label="Negative prompt",
242
  max_lines=1,
243
+ placeholder="What you DON'T want in the image (e.g., 'deformed, blurry, text')",
244
  visible=False,
245
  )
 
246
  seed = gr.Slider(
247
  label="Seed",
248
  minimum=0,
 
250
  step=1,
251
  value=0,
252
  )
 
253
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
 
254
  with gr.Row():
255
  width = gr.Slider(
256
  label="Width",
257
  minimum=256,
258
  maximum=MAX_IMAGE_SIZE,
259
  step=32,
260
+ value=1024,
261
  )
 
262
  height = gr.Slider(
263
  label="Height",
264
  minimum=256,
265
  maximum=MAX_IMAGE_SIZE,
266
  step=32,
267
+ value=1024,
268
  )
 
269
  with gr.Row():
270
  guidance_scale = gr.Slider(
271
  label="Guidance scale",
272
  minimum=0.0,
273
+ maximum=20.0, # Increased max for more control
274
  step=0.1,
275
+ value=7.0,
276
  )
 
277
  num_inference_steps = gr.Slider(
278
  label="Number of inference steps",
279
  minimum=1,
280
+ maximum=100, # More typical steps for SDXL (20-50 usually sufficient)
281
  step=1,
282
+ value=30,
283
  )
 
284
  gr.Examples(examples=examples, inputs=[prompt])
285
+
286
  gr.on(
287
  triggers=[run_button.click, prompt.submit],
288
  fn=infer,
 
290
  prompt,
291
  negative_prompt,
292
  style_selection,
293
+ input_image_pose,
294
+ pose_strength,
295
+ input_image_face,
296
+ face_fidelity,
297
  seed,
298
  randomize_seed,
299
  width,
300
  height,
301
  guidance_scale,
302
  num_inference_steps,
 
 
303
  ],
304
  outputs=[result, seed],
305
  )