File size: 1,483 Bytes
b94a16e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import requests
from PIL import Image
import torch
from torchvision import transforms
from transformers import (
VisionEncoderDecoderModel,
ViTImageProcessor,
AutoTokenizer,
BlipProcessor,
BlipForConditionalGeneration,
)
from diffusers import (
DiffusionPipeline,
StableDiffusionPipeline,
StableDiffusionImageVariationPipeline,
)
def generate_image_caption(image_path):
# Diffusion pipeline
device = torch.device("cpu")
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained(
"lambdalabs/sd-image-variations-diffusers", revision="v2.0"
)
sd_pipe = sd_pipe.to(device)
pipeline = DiffusionPipeline.from_pretrained(
"lambdalabs/sd-image-variations-diffusers"
)
# Image transformations
img_transforms = transforms.Compose(
[
transforms.ToTensor(),
transforms.Resize(
(224, 224),
interpolation=transforms.InterpolationMode.BICUBIC,
antialias=False,
),
transforms.Normalize(
[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
),
]
)
# Image-to-image
with Image.open(image_path) as img:
img_tensor = img_transforms(img).to(device).unsqueeze(0)
out = sd_pipe(img_tensor, guidance_scale=3)
out["images"][0].save("img1.jpg")
generate_image_caption("C:\Master\First.jpg")
|