ziyjiang commited on
Commit
0748cfb
·
verified ·
1 Parent(s): f759c6b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -68
README.md CHANGED
@@ -33,75 +33,7 @@ VLM2Vec-LlaVa-Next could outperform the baselines and other version of VLM2Vec b
33
 
34
  ## How to use VLM2Vec-LlaVa-Next
35
 
36
- First you can clone our github
37
- ```bash
38
- git clone https://github.com/TIGER-AI-Lab/VLM2Vec.git
39
- ```
40
-
41
- Then you can enter the directory to run the following command.
42
- ```python
43
- from src.model import MMEBModel
44
- from src.arguments import ModelArguments
45
- from src.utils import load_processor
46
-
47
- import torch
48
- from transformers import HfArgumentParser, AutoProcessor
49
- from PIL import Image
50
- import numpy as np
51
-
52
- model_args = ModelArguments(
53
- model_name='TIGER-Lab/VLM2Vec-LLaVa-Next',
54
- pooling='last',
55
- normalize=True,
56
- model_backbone='llava')
57
-
58
- model = MMEBModel.load(model_args)
59
- model.eval()
60
- model = model.to('cuda', dtype=torch.bfloat16)
61
-
62
- processor = load_processor(model_args)
63
-
64
- # Image + Text -> Text
65
- inputs = processor('<image> Represent the given image with the following question: What is in the image', [Image.open('figures/example.jpg')])
66
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
67
- qry_output = model(qry=inputs)["qry_reps"]
68
-
69
- string = 'A cat and a dog'
70
- inputs = processor(string)
71
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
72
- tgt_output = model(tgt=inputs)["tgt_reps"]
73
- print(string, '=', model.compute_similarity(qry_output, tgt_output))
74
- ## A cat and a dog = tensor([[0.2969]], device='cuda:0', dtype=torch.bfloat16)
75
-
76
- string = 'A cat and a tiger'
77
- inputs = processor(string)
78
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
79
- tgt_output = model(tgt=inputs)["tgt_reps"]
80
- print(string, '=', model.compute_similarity(qry_output, tgt_output))
81
- ## A cat and a tiger = tensor([[0.2080]], device='cuda:0', dtype=torch.bfloat16)
82
-
83
- # Text -> Image
84
- inputs = processor('Find me an everyday image that matches the given caption: A cat and a dog.',)
85
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
86
- qry_output = model(qry=inputs)["qry_reps"]
87
-
88
- string = '<image> Represent the given image.'
89
- inputs = processor(string, [Image.open('figures/example.jpg')])
90
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
91
- tgt_output = model(tgt=inputs)["tgt_reps"]
92
- print(string, '=', model.compute_similarity(qry_output, tgt_output))
93
- ## <|image_1|> Represent the given image. = tensor([[0.3105]], device='cuda:0', dtype=torch.bfloat16)
94
-
95
- inputs = processor('Find me an everyday image that matches the given caption: A cat and a tiger.',)
96
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
97
- qry_output = model(qry=inputs)["qry_reps"]
98
 
99
- string = '<image> Represent the given image.'
100
- inputs = processor(string, [Image.open('figures/example.jpg')])
101
- inputs = {key: value.to('cuda') for key, value in inputs.items()}
102
- tgt_output = model(tgt=inputs)["tgt_reps"]
103
- print(string, '=', model.compute_similarity(qry_output, tgt_output))
104
- ## <|image_1|> Represent the given image. = tensor([[0.2158]], device='cuda:0', dtype=torch.bfloat16)
105
  ```
106
 
107
  ## Citation
 
33
 
34
  ## How to use VLM2Vec-LlaVa-Next
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
37
  ```
38
 
39
  ## Citation