Spaces:

TheAIBoi
/

ImageGen

Running

App Files Files Community

TheAIBoi commited on Jun 13

Commit

8af5338

verified ·

1 Parent(s): b0ede5e

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -79

app.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import gradio as gr
 import numpy as np
 import random
-import spaces #[uncomment to use ZeroGPU]
-from diffusers import StableDiffusionXLPipeline, AutoencoderKL, StableDiffusionXLImg2ImgPipeline
 import torch
 from typing import Tuple
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "RunDiffusion/Juggernaut-XL-v9"  # Replace to the model you would like to use
@@ -27,16 +31,23 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
     )
 pipe.to(device)
-pipe_img2img = StableDiffusionXLImg2ImgPipeline.from_pretrained(
-    "RunDiffusion/Juggernaut-XL-v9",
-    vae=vae,
-    torch_dtype=torch.float16,
-    custom_pipeline="lpw_stable_diffusion_xl",
-    use_safetensors=True,
-    add_watermarker=False,
-    variant="fp16",
-)
-pipe_img2img.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 4096
@@ -104,19 +115,22 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
         negative = ""
     return p.replace("{prompt}", positive), n + negative
-@spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     prompt,
     negative_prompt,
     style,
     seed,
     randomize_seed,
     width,
     height,
     guidance_scale,
     num_inference_steps,
-    input_image=None,  # New parameter for input image
-    strength=0.8,  # New parameter for img2img strength
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
@@ -124,71 +138,94 @@ def infer(
     prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
     generator = torch.Generator().manual_seed(seed)
-    if input_image is not None:
-        # Use img2img pipeline if an image is provided
-        image = pipe_img2img(
-            prompt=prompt,
-            image=input_image,  # Pass the input image
-            strength=strength,   # Control how much the image is changed
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            generator=generator,
-        ).images[0]
-    else:
-        # Use text2img pipeline otherwise
-        image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator,
-        ).images[0]
-    return image, seed
 examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
 ]
-css = """
-#col-container {
     margin: 0 auto;
     max-width: 640px;
-}
-"""
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # ImageGen, the fastest and most precise image generator")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
                 max_lines=1,
-                placeholder="Enter your prompt",
                 container=False,
             )
-            run_button = gr.Button("Run", scale=0, variant="primary")
         result = gr.Image(label="Result", show_label=False)
-        # Add image input and strength slider
-        with gr.Row():
-            input_image = gr.Image(type="pil", label="Input Image (Optional)", show_label=True, height=200)
-            with gr.Column():
-                strength = gr.Slider(
-                    label="Image Strength",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=0.8, # Default strength for img2img
-                    visible=True, # Make it visible if you want it always there, or toggle visibility with JS
-                )
         with gr.Row(visible=True):
             style_selection = gr.Radio(
@@ -199,15 +236,13 @@ with gr.Blocks(css=css) as demo:
                 value=DEFAULT_STYLE_NAME,
                 label="Image Style",
             )
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
-                placeholder="Enter a negative prompt",
                 visible=False,
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -215,44 +250,39 @@ with gr.Blocks(css=css) as demo:
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=4096,  # Replace with defaults that work for your model
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=4096,  # Replace with defaults that work for your model
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
-                    maximum=10.0,
                     step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
-                    maximum=500,
                     step=1,
-                    value=500,  # Replace with defaults that work for your model
                 )
         gr.Examples(examples=examples, inputs=[prompt])
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn=infer,
@@ -260,14 +290,16 @@ with gr.Blocks(css=css) as demo:
             prompt,
             negative_prompt,
             style_selection,
             seed,
             randomize_seed,
             width,
             height,
             guidance_scale,
             num_inference_steps,
-            input_image,  # Add input_image to inputs
-            strength,     # Add strength to inputs
         ],
         outputs=[result, seed],
     )

 import gradio as gr
 import numpy as np
 import random
+import spaces
+from diffusers import StableDiffusionXLPipeline, AutoencoderKL, ControlNetModel
+from diffusers.utils import load_image
 import torch
 from typing import Tuple
+from PIL import Image
+from controlnet_aux import OpenposeDetector
+import insightface
+import onnxruntime
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "RunDiffusion/Juggernaut-XL-v9"  # Replace to the model you would like to use
     )
 pipe.to(device)
+controlnet_openpose = ControlNetModel.from_pretrained(
+    "lllyasviel/control_v11p_sdxl_openpose", torch_dtype=torch.float16
+).to(device)
+openpose_detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet/annotator/ckpts/body_pose_model.pth").to(device)
+try:
+    pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-faceid_sdxl.bin")
+except Exception as e:
+    print(f"Could not load IP-Adapter FaceID. Make sure the model exists and paths are correct: {e}")
+    print("Trying a common alternative: ip-adapter-plus-face_sdxl_vit-h.safetensors")
+    try:
+        pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors")
+    except Exception as e2:
+        print(f"Could not load second IP-Adapter variant: {e2}")
+        print("IP-Adapter will not be available. Please check your IP-Adapter setup.")
+        pipe.unload_ip_adapter()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 4096
         negative = ""
     return p.replace("{prompt}", positive), n + negative
+@spaces.GPU
 def infer(
     prompt,
     negative_prompt,
     style,
+    # Removed general img2img reference as we are specializing
+    input_image_pose, # New: for ControlNet OpenPose
+    pose_strength,    # New: strength for ControlNet
+    input_image_face, # New: for IP-Adapter Face
+    face_fidelity,    # New: fidelity/strength for IP-Adapter
     seed,
     randomize_seed,
     width,
     height,
     guidance_scale,
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
     prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
     generator = torch.Generator().manual_seed(seed)
+    # --- NEW: Prepare ControlNet and IP-Adapter inputs ---
+    controlnet_images = []
+    controlnet_conditioning_scales = []
+    controlnet_models_to_use = []
+    ip_adapter_image_embeddings = None # Will store the face embeddings
+    # Process Pose Reference
+    if input_image_pose:
+        # Preprocess the image to get the OpenPose skeleton
+        processed_pose_image = openpose_detector(input_image_pose)
+        controlnet_images.append(processed_pose_image)
+        controlnet_conditioning_scales.append(pose_strength)
+        controlnet_models_to_use.append(controlnet_openpose)
+    # Process Face Reference (IP-Adapter)
+    if input_image_face and pipe.has_lora_weights("ip_adapter"): # Check if IP-Adapter was loaded successfully
+        # For IP-Adapter FaceID, the pipeline itself usually handles embedding extraction
+        # You just pass the image directly.
+        # The scale is set before the call.
+        pipe.set_ip_adapter_scale(face_fidelity)
+        # ip_adapter_image_embeddings = pipe.encode_ip_adapter_image(input_image_face) # If you need to manually encode
+        # Often, you just pass the image to the main call directly if IP-Adapter is loaded.
+    # --- END NEW INPUT PREPARATION ---
+    # Adjusting the pipe call to use ControlNet and IP-Adapter
+    # Note: If no reference images are provided, it will fall back to text-to-image.
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        image=controlnet_images if controlnet_images else None, # Pass processed pose image(s) if available
+        controlnet_conditioning_scale=controlnet_conditioning_scales if controlnet_conditioning_scales else None,
+        controlnet=controlnet_models_to_use if controlnet_models_to_use else None,
+        ip_adapter_image=input_image_face if input_image_face else None, # Pass the raw face image for IP-Adapter
+        # ip_adapter_image_embeds=ip_adapter_image_embeddings, # Use this if you pre-encode
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        width=width,
+        height=height,
+        generator=generator,
+    ).images[0]
+    return image, seed
 examples = [
+    "A stunning woman standing on a beach at sunset, dramatic lighting, highly detailed",
+    "A man in a futuristic city, cyberpunk style, neon lights",
+    "An AI model posing with a friendly robot in a studio, professional photoshoot",
 ]
+css = """#col-container {
     margin: 0 auto;
     max-width: 640px;
+}"""
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # AI Instagram Model Creator")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
                 max_lines=1,
+                placeholder="Describe your AI model and scene (e.g., 'A confident woman in a red dress, city background')",
                 container=False,
             )
+            run_button = gr.Button("Generate", scale=0, variant="primary")
         result = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Reference Images", open=True):
+            gr.Markdown("Upload images to control pose and face consistency.")
+            input_image_pose = gr.Image(label="Human Pose Reference (for body posture)", type="pil", show_label=True)
+            pose_strength = gr.Slider(
+                label="Pose Control Strength (0.0 = ignore, 1.0 = strict adherence)",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.8, # Good starting point for strong pose control
+            )
+            gr.Markdown("---") # Separator
+            input_image_face = gr.Image(label="Face Reference (for facial consistency)", type="pil", show_label=True)
+            face_fidelity = gr.Slider(
+                label="Face Fidelity (0.0 = ignore, 1.0 = highly similar)",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.7, # Good starting point for face transfer
+            )
         with gr.Row(visible=True):
             style_selection = gr.Radio(
                 value=DEFAULT_STYLE_NAME,
                 label="Image Style",
             )
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
+                placeholder="What you DON'T want in the image (e.g., 'deformed, blurry, text')",
                 visible=False,
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=1024,
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=1024,
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
+                    maximum=20.0, # Increased max for more control
                     step=0.1,
+                    value=7.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
+                    maximum=100, # More typical steps for SDXL (20-50 usually sufficient)
                     step=1,
+                    value=30,
                 )
         gr.Examples(examples=examples, inputs=[prompt])
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn=infer,
             prompt,
             negative_prompt,
             style_selection,
+            input_image_pose,
+            pose_strength,
+            input_image_face,
+            face_fidelity,
             seed,
             randomize_seed,
             width,
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result, seed],
     )