# Jina Embeddings V4 ## Examples Encode functions: ```python import torch from transformers import AutoModel from PIL import Image device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load model model = AutoModel.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True) model = model.to(device) # Sample data texts = ["Here is some sample code", "This is a matching text"] image_paths = ['/'] images = [Image.open(path) for path in image_paths] # Example 1: Text matching task with single vector embeddings # Generate embeddings with dimension truncation (256), decrease max_pixels img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112, task='text-matching') text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512, task='text-matching') # Example 2: Retrieval task with multi-vector embeddings model.set_task(task='retrieval') # Generate multi-vector embeddings img_embeddings = model.encode_images(images=images, vector_type='multi_vector') text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage') # Example 3: Code task with single vector embeddings code = ["def hello_world():\n print('Hello, World!')"] code_embeddings = model.encode_texts(texts=code, task='code') ``` Using the model forward: ```python import torch from transformers import AutoModel, AutoProcessor from PIL import Image device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load model and processor model = AutoModel.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True) model = model.to(device) processor = AutoProcessor.from_pretrained('jinaai/jina-embeddings-v4', trust_remote_code=True) # Sample data texts = ["Here is some sample code", "This is a matching text"] image_paths = ['/'] # Process text and images text_batch = processor.process_texts(texts=texts, prefix="Query", max_length=512) images = [Image.open(path) for path in image_paths] image_batch = processor.process_images(images=images) # Forward pass model.eval() with torch.no_grad(): text_batch = {k: v.to(device) for k, v in text_batch.items()} image_batch = {k: v.to(device) for k, v in image_batch.items()} with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'): # Get embeddings text_embeddings = model.model(**text_batch, task_label='retrieval').single_vec_emb img_embeddings = model.model(**image_batch, task_label='retrieval').single_vec_emb ``` Inference via the `SentenceTransformer` library: ```python from sentence_transformers import SentenceTransformer model = SentenceTransformer( 'jinaai/jina-embeddings-v4', trust_remote_code=True ) emb = model.encode(['Khinkali is the best'], task='retrieval', prompt_name='query') ```