VetriVelRavi's picture
Update app.py
5e2c81b verified
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
from PIL import Image
import numpy as np
import cv2
# Detect GPU or fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load ControlNet model (depth conditioning)
controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float32
).to(device)
# Load Stable Diffusion with ControlNet
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
torch_dtype=torch.float32
).to(device)
# Load depth estimation model
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
def generate(input_image, prompt):
# Convert image to RGB
image = input_image.convert("RGB")
# Prepare depth inputs
inputs = depth_processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = depth_model(**inputs)
depth = outputs.predicted_depth.squeeze().cpu().numpy()
# Normalize to 0–255 and convert to grayscale PIL
depth = cv2.normalize(depth, None, 0, 255, norm_type=cv2.NORM_MINMAX)
depth_image = Image.fromarray(depth.astype(np.uint8))
# Run image generation
result = pipe(
prompt=prompt,
image=depth_image,
height=512, # You can reduce this if slow
width=512,
num_inference_steps=10
).images[0]
return result
# Gradio interface
gr.Interface(
fn=generate,
inputs=[
gr.Image(type="pil", label="Upload Room Image"),
gr.Textbox(label="Enter Interior Style Prompt", placeholder="e.g. modern Japanese living room"),
],
outputs=gr.Image(type="pil", label="Generated Room"),
title="πŸ›‹οΈ AI Room Redesign (ControlNet + Depth)",
description="Upload a room image and get a redesigned version based on your style prompt using ControlNet Depth.",
).launch()