|
import os |
|
import requests |
|
from PIL import Image |
|
import torch |
|
from torchvision import transforms |
|
from transformers import ( |
|
VisionEncoderDecoderModel, |
|
ViTImageProcessor, |
|
AutoTokenizer, |
|
BlipProcessor, |
|
BlipForConditionalGeneration, |
|
) |
|
from diffusers import ( |
|
DiffusionPipeline, |
|
StableDiffusionPipeline, |
|
StableDiffusionImageVariationPipeline, |
|
) |
|
|
|
def generate_image_caption(image_path): |
|
|
|
device = torch.device("cpu") |
|
os.environ["CUDA_LAUNCH_BLOCKING"] = "1" |
|
|
|
sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained( |
|
"lambdalabs/sd-image-variations-diffusers", revision="v2.0" |
|
) |
|
sd_pipe = sd_pipe.to(device) |
|
|
|
pipeline = DiffusionPipeline.from_pretrained( |
|
"lambdalabs/sd-image-variations-diffusers" |
|
) |
|
|
|
|
|
img_transforms = transforms.Compose( |
|
[ |
|
transforms.ToTensor(), |
|
transforms.Resize( |
|
(224, 224), |
|
interpolation=transforms.InterpolationMode.BICUBIC, |
|
antialias=False, |
|
), |
|
transforms.Normalize( |
|
[0.5, 0.5, 0.5], [0.5, 0.5, 0.5] |
|
), |
|
] |
|
) |
|
|
|
|
|
with Image.open(image_path) as img: |
|
img_tensor = img_transforms(img).to(device).unsqueeze(0) |
|
out = sd_pipe(img_tensor, guidance_scale=3) |
|
out["images"][0].save("img1.jpg") |
|
|
|
|
|
|
|
|
|
|
|
generate_image_caption("C:\Master\First.jpg") |
|
|
|
|
|
|