File size: 2,144 Bytes
f5338c4
 
 
 
 
 
 
 
5e2c81b
 
 
 
f5338c4
5e2c81b
 
f5338c4
5e2c81b
f5338c4
 
 
5e2c81b
 
5b99430
5e2c81b
 
f5338c4
 
 
5e2c81b
f5338c4
5e2c81b
 
 
96c0600
f5338c4
 
 
 
5e2c81b
f5338c4
 
 
5e2c81b
f5338c4
 
 
5e2c81b
 
 
f5338c4
 
 
 
5e2c81b
f5338c4
 
96c0600
 
5e2c81b
96c0600
5b99430
5e2c81b
 
f5338c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
from PIL import Image
import numpy as np
import cv2

# Detect GPU or fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ControlNet model (depth conditioning)
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float32
).to(device)

# Load Stable Diffusion with ControlNet
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float32
).to(device)

# Load depth estimation model
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")

def generate(input_image, prompt):
    # Convert image to RGB
    image = input_image.convert("RGB")

    # Prepare depth inputs
    inputs = depth_processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = depth_model(**inputs)
        depth = outputs.predicted_depth.squeeze().cpu().numpy()

    # Normalize to 0–255 and convert to grayscale PIL
    depth = cv2.normalize(depth, None, 0, 255, norm_type=cv2.NORM_MINMAX)
    depth_image = Image.fromarray(depth.astype(np.uint8))

    # Run image generation
    result = pipe(
        prompt=prompt,
        image=depth_image,
        height=512,   # You can reduce this if slow
        width=512,
        num_inference_steps=10
    ).images[0]

    return result

# Gradio interface
gr.Interface(
    fn=generate,
    inputs=[
        gr.Image(type="pil", label="Upload Room Image"),
        gr.Textbox(label="Enter Interior Style Prompt", placeholder="e.g. modern Japanese living room"),
    ],
    outputs=gr.Image(type="pil", label="Generated Room"),
    title="πŸ›‹οΈ AI Room Redesign (ControlNet + Depth)",
    description="Upload a room image and get a redesigned version based on your style prompt using ControlNet Depth.",
).launch()