Spaces:

TheAIBoi
/

ImageGen

Running

App Files Files Community

ImageGen / app.py

TheAIBoi

Update app.py

8af5338 verified about 1 month ago

raw

history blame

12.6 kB

	import gradio as gr
	import numpy as np
	import random
	import spaces
	from diffusers import StableDiffusionXLPipeline, AutoencoderKL, ControlNetModel
	from diffusers.utils import load_image
	import torch
	from typing import Tuple
	from PIL import Image
	from controlnet_aux import OpenposeDetector
	import insightface
	import onnxruntime

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_repo_id = "RunDiffusion/Juggernaut-XL-v9" # Replace to the model you would like to use
	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

	if torch.cuda.is_available():
	torch_dtype = torch.float16
	else:
	torch_dtype = torch.float32

	pipe = StableDiffusionXLPipeline.from_pretrained(
	"RunDiffusion/Juggernaut-XL-v9",
	vae=vae,
	torch_dtype=torch.float16,
	custom_pipeline="lpw_stable_diffusion_xl",
	use_safetensors=True,
	add_watermarker=False,
	variant="fp16",
	)
	pipe.to(device)

	controlnet_openpose = ControlNetModel.from_pretrained(
	"lllyasviel/control_v11p_sdxl_openpose", torch_dtype=torch.float16
	).to(device)

	openpose_detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet/annotator/ckpts/body_pose_model.pth").to(device)

	try:
	pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-faceid_sdxl.bin")
	except Exception as e:
	print(f"Could not load IP-Adapter FaceID. Make sure the model exists and paths are correct: {e}")
	print("Trying a common alternative: ip-adapter-plus-face_sdxl_vit-h.safetensors")
	try:
	pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors")
	except Exception as e2:
	print(f"Could not load second IP-Adapter variant: {e2}")
	print("IP-Adapter will not be available. Please check your IP-Adapter setup.")
	pipe.unload_ip_adapter()

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 4096

	style_list = [
	{
	"name": "(No style)",
	"prompt": "{prompt}",
	"negative_prompt": "",
	},
	{
	"name": "Cinematic",
	"prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
	"negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
	},
	{
	"name": "Photographic",
	"prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
	"negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
	},
	{
	"name": "Anime",
	"prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
	"negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
	},
	{
	"name": "Manga",
	"prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
	"negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
	},
	{
	"name": "Digital Art",
	"prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
	"negative_prompt": "photo, photorealistic, realism, ugly",
	},
	{
	"name": "Pixel art",
	"prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
	"negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
	},
	{
	"name": "Fantasy art",
	"prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
	"negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
	},
	{
	"name": "Neonpunk",
	"prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
	"negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
	},
	{
	"name": "3D Model",
	"prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
	"negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
	},
	]

	styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
	STYLE_NAMES = list(styles.keys())
	DEFAULT_STYLE_NAME = "(No style)"

	def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
	p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
	if not negative:
	negative = ""
	return p.replace("{prompt}", positive), n + negative

	@spaces.GPU
	def infer(
	prompt,
	negative_prompt,
	style,
	# Removed general img2img reference as we are specializing
	input_image_pose, # New: for ControlNet OpenPose
	pose_strength, # New: strength for ControlNet
	input_image_face, # New: for IP-Adapter Face
	face_fidelity, # New: fidelity/strength for IP-Adapter
	seed,
	randomize_seed,
	width,
	height,
	guidance_scale,
	num_inference_steps,
	progress=gr.Progress(track_tqdm=True),
	):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
	generator = torch.Generator().manual_seed(seed)

	# --- NEW: Prepare ControlNet and IP-Adapter inputs ---
	controlnet_images = []
	controlnet_conditioning_scales = []
	controlnet_models_to_use = []
	ip_adapter_image_embeddings = None # Will store the face embeddings

	# Process Pose Reference
	if input_image_pose:
	# Preprocess the image to get the OpenPose skeleton
	processed_pose_image = openpose_detector(input_image_pose)
	controlnet_images.append(processed_pose_image)
	controlnet_conditioning_scales.append(pose_strength)
	controlnet_models_to_use.append(controlnet_openpose)

	# Process Face Reference (IP-Adapter)
	if input_image_face and pipe.has_lora_weights("ip_adapter"): # Check if IP-Adapter was loaded successfully
	# For IP-Adapter FaceID, the pipeline itself usually handles embedding extraction
	# You just pass the image directly.
	# The scale is set before the call.
	pipe.set_ip_adapter_scale(face_fidelity)
	# ip_adapter_image_embeddings = pipe.encode_ip_adapter_image(input_image_face) # If you need to manually encode
	# Often, you just pass the image to the main call directly if IP-Adapter is loaded.

	# --- END NEW INPUT PREPARATION ---

	# Adjusting the pipe call to use ControlNet and IP-Adapter
	# Note: If no reference images are provided, it will fall back to text-to-image.
	image = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=controlnet_images if controlnet_images else None, # Pass processed pose image(s) if available
	controlnet_conditioning_scale=controlnet_conditioning_scales if controlnet_conditioning_scales else None,
	controlnet=controlnet_models_to_use if controlnet_models_to_use else None,
	ip_adapter_image=input_image_face if input_image_face else None, # Pass the raw face image for IP-Adapter
	# ip_adapter_image_embeds=ip_adapter_image_embeddings, # Use this if you pre-encode
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	width=width,
	height=height,
	generator=generator,
	).images[0]

	return image, seed

	examples = [
	"A stunning woman standing on a beach at sunset, dramatic lighting, highly detailed",
	"A man in a futuristic city, cyberpunk style, neon lights",
	"An AI model posing with a friendly robot in a studio, professional photoshoot",
	]
	css = """#col-container {
	margin: 0 auto;
	max-width: 640px;
	}"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(" # AI Instagram Model Creator")
	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Describe your AI model and scene (e.g., 'A confident woman in a red dress, city background')",
	container=False,
	)
	run_button = gr.Button("Generate", scale=0, variant="primary")
	result = gr.Image(label="Result", show_label=False)

	with gr.Accordion("Reference Images", open=True):
	gr.Markdown("Upload images to control pose and face consistency.")
	input_image_pose = gr.Image(label="Human Pose Reference (for body posture)", type="pil", show_label=True)
	pose_strength = gr.Slider(
	label="Pose Control Strength (0.0 = ignore, 1.0 = strict adherence)",
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.8, # Good starting point for strong pose control
	)
	gr.Markdown("---") # Separator

	input_image_face = gr.Image(label="Face Reference (for facial consistency)", type="pil", show_label=True)
	face_fidelity = gr.Slider(
	label="Face Fidelity (0.0 = ignore, 1.0 = highly similar)",
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=0.7, # Good starting point for face transfer
	)

	with gr.Row(visible=True):
	style_selection = gr.Radio(
	show_label=True,
	container=True,
	interactive=True,
	choices=STYLE_NAMES,
	value=DEFAULT_STYLE_NAME,
	label="Image Style",
	)
	with gr.Accordion("Advanced Settings", open=False):
	negative_prompt = gr.Text(
	label="Negative prompt",
	max_lines=1,
	placeholder="What you DON'T want in the image (e.g., 'deformed, blurry, text')",
	visible=False,
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=1024,
	)
	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=1024,
	)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=20.0, # Increased max for more control
	step=0.1,
	value=7.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=100, # More typical steps for SDXL (20-50 usually sufficient)
	step=1,
	value=30,
	)
	gr.Examples(examples=examples, inputs=[prompt])

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=infer,
	inputs=[
	prompt,
	negative_prompt,
	style_selection,
	input_image_pose,
	pose_strength,
	input_image_face,
	face_fidelity,
	seed,
	randomize_seed,
	width,
	height,
	guidance_scale,
	num_inference_steps,
	],
	outputs=[result, seed],
	)

	if __name__ == "__main__":
	demo.launch(share=True)