Transformers
resnet50
Inference Endpoints
testingmodel / main.py
Kajuto's picture
Update main.py
abffe0c
import os
import requests
from PIL import Image
import torch
from torchvision import transforms
from transformers import (
VisionEncoderDecoderModel,
ViTImageProcessor,
AutoTokenizer,
BlipProcessor,
BlipForConditionalGeneration,
)
from diffusers import (
DiffusionPipeline,
StableDiffusionPipeline,
StableDiffusionImageVariationPipeline,
)
def generate_image_caption(image_path):
# Diffusion pipeline
device = torch.device("cpu")
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained(
"lambdalabs/sd-image-variations-diffusers", revision="v2.0"
)
sd_pipe = sd_pipe.to(device)
pipeline = DiffusionPipeline.from_pretrained(
"lambdalabs/sd-image-variations-diffusers"
)
# Image transformations
img_transforms = transforms.Compose(
[
transforms.ToTensor(),
transforms.Resize(
(224, 224),
interpolation=transforms.InterpolationMode.BICUBIC,
antialias=False,
),
transforms.Normalize(
[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
),
]
)
# Image-to-image
with Image.open(image_path) as img:
img_tensor = img_transforms(img).to(device).unsqueeze(0)
out = sd_pipe(img_tensor, guidance_scale=3)
out["images"][0].save("img1.jpg")
generate_image_caption("C:\Master\First.jpg")