Update README.md
Browse files
README.md
CHANGED
|
@@ -37,7 +37,7 @@ The model uses bf16 tensors and allocates ~4.4GB of VRAM when loaded. You can ea
|
|
| 37 |
| 16 | 11.5 |
|
| 38 |
| 32 | 19.7 |
|
| 39 |
|
| 40 |
-
|
| 41 |
|
| 42 |
<details open>
|
| 43 |
<summary>
|
|
@@ -52,12 +52,13 @@ pip install -U llama-index-embeddings-huggingface
|
|
| 52 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 53 |
|
| 54 |
model = HuggingFaceEmbedding(
|
| 55 |
-
|
| 56 |
-
device="mps",
|
| 57 |
trust_remote_code=True,
|
| 58 |
)
|
| 59 |
|
| 60 |
-
|
|
|
|
| 61 |
```
|
| 62 |
|
| 63 |
</details>
|
|
@@ -80,7 +81,7 @@ min_pixels = 1 * 28 * 28
|
|
| 80 |
|
| 81 |
# Load the embedding model and processor
|
| 82 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 83 |
-
'llamaindex/vdr-2b-
|
| 84 |
# These are the recommended kwargs for the model, but change them as needed
|
| 85 |
attn_implementation="flash_attention_2",
|
| 86 |
torch_dtype=torch.bfloat16,
|
|
@@ -88,7 +89,7 @@ model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
| 88 |
).eval()
|
| 89 |
|
| 90 |
processor = AutoProcessor.from_pretrained(
|
| 91 |
-
'llamaindex/vdr-2b-
|
| 92 |
min_pixels=min_pixels,
|
| 93 |
max_pixels=max_pixels
|
| 94 |
)
|
|
@@ -216,10 +217,10 @@ via SentenceTransformers
|
|
| 216 |
from sentence_transformers import SentenceTransformer
|
| 217 |
|
| 218 |
model = SentenceTransformer(
|
| 219 |
-
model_name_or_path="llamaindex/vdr-2b-
|
| 220 |
-
device="
|
| 221 |
trust_remote_code=True,
|
| 222 |
-
# These are the recommended kwargs for the model, but change them as needed
|
| 223 |
model_kwargs={
|
| 224 |
"torch_dtype": torch.bfloat16,
|
| 225 |
"device_map": "cuda:0",
|
|
|
|
| 37 |
| 16 | 11.5 |
|
| 38 |
| 32 | 19.7 |
|
| 39 |
|
| 40 |
+
You can generate embeddings with this model in many different ways:
|
| 41 |
|
| 42 |
<details open>
|
| 43 |
<summary>
|
|
|
|
| 52 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 53 |
|
| 54 |
model = HuggingFaceEmbedding(
|
| 55 |
+
model_name="llamaindex/vdr-2b-v1",
|
| 56 |
+
device="cpu", # "mps" for mac, "cuda" for nvidia GPUs
|
| 57 |
trust_remote_code=True,
|
| 58 |
)
|
| 59 |
|
| 60 |
+
image_embedding = model.get_image_embedding("image.png")
|
| 61 |
+
query_embedding = model.get_query_embedding("some query")
|
| 62 |
```
|
| 63 |
|
| 64 |
</details>
|
|
|
|
| 81 |
|
| 82 |
# Load the embedding model and processor
|
| 83 |
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 84 |
+
'llamaindex/vdr-2b-v1',
|
| 85 |
# These are the recommended kwargs for the model, but change them as needed
|
| 86 |
attn_implementation="flash_attention_2",
|
| 87 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 89 |
).eval()
|
| 90 |
|
| 91 |
processor = AutoProcessor.from_pretrained(
|
| 92 |
+
'llamaindex/vdr-2b-v1',
|
| 93 |
min_pixels=min_pixels,
|
| 94 |
max_pixels=max_pixels
|
| 95 |
)
|
|
|
|
| 217 |
from sentence_transformers import SentenceTransformer
|
| 218 |
|
| 219 |
model = SentenceTransformer(
|
| 220 |
+
model_name_or_path="llamaindex/vdr-2b-v1",
|
| 221 |
+
device="cuda",
|
| 222 |
trust_remote_code=True,
|
| 223 |
+
# These are the recommended kwargs for the model, but change them as needed if you don't have CUDA
|
| 224 |
model_kwargs={
|
| 225 |
"torch_dtype": torch.bfloat16,
|
| 226 |
"device_map": "cuda:0",
|