Jina Embeddings V4

Examples

Encode functions:

import torch
from transformers import AutoModel
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True)
model = model.to(device)

# Sample data
texts = ["Here is some sample code", "This is a matching text"]
image_paths = ['/<path_to_image>']
images = [Image.open(path) for path in image_paths]

# Example 1: Text matching task with single vector embeddings
# Generate embeddings with dimension truncation (256), decrease max_pixels
img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112, task='text-matching')
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512, task='text-matching')

# Example 2: Retrieval task with multi-vector embeddings
model.set_task(task='retrieval')

# Generate multi-vector embeddings
img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')

# Example 3: Code task with single vector embeddings
code = ["def hello_world():\n    print('Hello, World!')"]
code_embeddings = model.encode_texts(texts=code, task='code')

Using the model forward:

import torch
from transformers import AutoModel, AutoProcessor
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model and processor
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True)
model = model.to(device)
processor = AutoProcessor.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True)


# Sample data
texts = ["Here is some sample code", "This is a matching text"]
image_paths = ['/<path_to_image>']

# Process text and images
text_batch = processor.process_texts(texts=texts, prefix="Query", max_length=512)
images = [Image.open(path) for path in image_paths]
image_batch = processor.process_images(images=images)

# Forward pass
model.eval()
with torch.no_grad():
    text_batch = {k: v.to(device) for k, v in text_batch.items()}
    image_batch = {k: v.to(device) for k, v in image_batch.items()}
    
    with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
        # Get embeddings
        text_embeddings = model.model(**text_batch, task_label='retrieval').single_vec_emb
        img_embeddings = model.model(**image_batch, task_label='retrieval').single_vec_emb

Inference via the SentenceTransformer library:

from sentence_transformers import SentenceTransformer

model = SentenceTransformer(
    'jinaai/jina-embeddings-v4', trust_remote_code=True
)

emb = model.encode(['Khinkali is the best'], task='retrieval', prompt_name='query')