Transformers
resnet50
Inference Endpoints
File size: 1,483 Bytes
b94a16e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import requests
from PIL import Image
import torch
from torchvision import transforms
from transformers import (
    VisionEncoderDecoderModel,
    ViTImageProcessor,
    AutoTokenizer,
    BlipProcessor,
    BlipForConditionalGeneration,
)
from diffusers import (
    DiffusionPipeline,
    StableDiffusionPipeline,
    StableDiffusionImageVariationPipeline,
)

def generate_image_caption(image_path):
    # Diffusion pipeline
    device = torch.device("cpu")
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

    sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained(
        "lambdalabs/sd-image-variations-diffusers", revision="v2.0"
    )
    sd_pipe = sd_pipe.to(device)

    pipeline = DiffusionPipeline.from_pretrained(
        "lambdalabs/sd-image-variations-diffusers"
    )

    # Image transformations
    img_transforms = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Resize(
                (224, 224),
                interpolation=transforms.InterpolationMode.BICUBIC,
                antialias=False,
            ),
            transforms.Normalize(
                [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
            ),
        ]
    )

    # Image-to-image
    with Image.open(image_path) as img:
        img_tensor = img_transforms(img).to(device).unsqueeze(0)
        out = sd_pipe(img_tensor, guidance_scale=3)
        out["images"][0].save("img1.jpg")

    



generate_image_caption("C:\Master\First.jpg")