Image Editing with Diffusion Models
GitHub: https://github.com/iamlucaconti/InstructPix2Pix
Given an input image and an accompanying edit instruction, our model generates the corresponding modification directly. Unlike approaches that rely on detailed textual descriptions of both the source and target images, our method requires only the instruction (edit_prompt
) and performs the edit in a single forward pass, without per-example inversion or additional fine-tuning. An example generated by our model is shown below:

How to use our model
To edit an image using our model:
import os
import torch
import requests
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
from io import BytesIO
from diffusers import StableDiffusionInstructPix2PixPipeline
def download_image(url: str, resize: bool = False, resolution: int = 512) -> Image.Image:
# Download and open the image
image = Image.open(BytesIO(requests.get(url, stream=True).content))
# Fix orientation issues from EXIF metadata
image = ImageOps.exif_transpose(image).convert("RGB")
if resize:
w, h = image.size
if w > h:
new_w = resolution
new_h = int(h * resolution / w)
else:
new_h = resolution
new_w = int(w * resolution / h)
image = image.resize((new_w, new_h))
return image
# Parameters
pretrained_model_name_or_path = "iamlucaconti/instruct-pix2pix-model" # Custom Pix2Pix model
image_url = "<URL OF IMAGE TO EDIT>"
prompt = "<YOUR EDIT PROMPT>" # Instructional edit prompt
num_inference_steps = 50 # More steps = higher quality but slower
image_guidance_scale = 2.0 # Strength of adherence to input image
guidance_scale = 3.0 # Strength of adherence to text prompt
seed = 0 # Random seed (for reproducibility)
output_path = "output.png" # File to save result
# Load pipeline
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
pretrained_model_name_or_path,
torch_dtype=torch.float16,
safety_checker=None
).to("cuda")
# Load image
image = download_image(image_url, resize=True)
# Set seed
generator = torch.Generator("cuda").manual_seed(seed)
# Generate the edited image
edited_image = pipe(
prompt=prompt,
image=image,
num_inference_steps=num_inference_steps,
image_guidance_scale=image_guidance_scale,
guidance_scale=guidance_scale,
generator=generator
).images[0]
# Save
edited_image.save(output_path)
- Downloads last month
- 49