jinaai
/

jina-embeddings-v4

@@ -30,11 +30,6 @@ class PromptType(str, Enum):
     passage = "passage"
-class VectorOutputFormat(str, Enum):
-    SINGLE = "single"
-    MULTIPLE = "multiple"
 PREFIX_DICT = {"query": "Query", "passage": "Passage"}
@@ -325,7 +320,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         task_label: Union[str, List[str]],
         processor_fn: Callable,
         desc: str,
-        output_format: Union[str, VectorOutputFormat] = VectorOutputFormat.SINGLE,
         return_numpy: bool = False,
         batch_size: int = 32,
         truncate_dim: Optional[int] = None,
@@ -345,8 +340,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
                     device_type=torch.device(self.device).type, dtype=torch.bfloat16
                 ):
                     embeddings = self(**batch, task_label=task_label)
-                    output_format_str = output_format.value if isinstance(output_format, VectorOutputFormat) else output_format
-                    if output_format_str == VectorOutputFormat.SINGLE.value:
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
@@ -363,7 +357,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
     def _validate_encoding_params(
         self,
-        output_format: Optional[Union[str, VectorOutputFormat]] = None,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
     ) -> Dict[str, Any]:
@@ -380,17 +374,8 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
                     else PREFIX_DICT["query"]
                 )
-        output_format = output_format or VectorOutputFormat.SINGLE
-        if isinstance(output_format, VectorOutputFormat):
-            encode_kwargs["output_format"] = output_format.value
-        else:
-            try:
-                output_format_enum = VectorOutputFormat(output_format)
-                encode_kwargs["output_format"] = output_format_enum.value
-            except ValueError:
-                raise ValueError(
-                    f"Invalid output_format: {output_format}. Must be one of {[v.value for v in VectorOutputFormat]}."
-                )
         truncate_dim = truncate_dim or self.config.truncate_dim
         if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
@@ -423,7 +408,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
-        output_format: Optional[Union[str, VectorOutputFormat]] = VectorOutputFormat.SINGLE,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
@@ -435,7 +420,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             texts: text or list of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
-            output_format: Type of embedding vector to generate (VectorOutputFormat.SINGLE or VectorOutputFormat.MULTIPLE)
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             prompt_name: Type of text being encoded ('query' or 'passage')
@@ -445,7 +430,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         """
         prompt_name = prompt_name or "query"
         encode_kwargs = self._validate_encoding_params(
-            output_format, truncate_dim, prompt_name
         )
         task = self._validate_task(task)
@@ -490,7 +475,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         images: Union[str, Image.Image, List[Union[str, Image.Image]]],
         task: Optional[str] = None,
         batch_size: int = 8,
-        output_format: Optional[Union[str, VectorOutputFormat]] = VectorOutputFormat.SINGLE,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         max_pixels: Optional[int] = None,
@@ -501,7 +486,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         Args:
             images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
             batch_size: Number of images to process at once
-            output_format: Type of embedding vector to generate (VectorOutputFormat.SINGLE or VectorOutputFormat.MULTIPLE)
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             max_pixels: Maximum number of pixels to process per image
@@ -514,7 +499,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             self.processor.image_processor.max_pixels = (
                 max_pixels  # change during encoding
             )
-        encode_kwargs = self._validate_encoding_params(output_format, truncate_dim)
         task = self._validate_task(task)
         # Convert single image to list

     passage = "passage"
 PREFIX_DICT = {"query": "Query", "passage": "Passage"}
         task_label: Union[str, List[str]],
         processor_fn: Callable,
         desc: str,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         batch_size: int = 32,
         truncate_dim: Optional[int] = None,
                     device_type=torch.device(self.device).type, dtype=torch.bfloat16
                 ):
                     embeddings = self(**batch, task_label=task_label)
+                    if not return_multivector:
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
     def _validate_encoding_params(
         self,
+        return_multivector: Optional[bool] = None,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
     ) -> Dict[str, Any]:
                     else PREFIX_DICT["query"]
                 )
+        return_multivector = return_multivector or False
+        encode_kwargs["return_multivector"] = return_multivector
         truncate_dim = truncate_dim or self.config.truncate_dim
         if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
             texts: text or list of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             prompt_name: Type of text being encoded ('query' or 'passage')
         """
         prompt_name = prompt_name or "query"
         encode_kwargs = self._validate_encoding_params(
+            return_multivector=return_multivector, truncate_dim=truncate_dim, prompt_name=prompt_name
         )
         task = self._validate_task(task)
         images: Union[str, Image.Image, List[Union[str, Image.Image]]],
         task: Optional[str] = None,
         batch_size: int = 8,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         max_pixels: Optional[int] = None,
         Args:
             images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
             batch_size: Number of images to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             max_pixels: Maximum number of pixels to process per image
             self.processor.image_processor.max_pixels = (
                 max_pixels  # change during encoding
             )
+        encode_kwargs = self._validate_encoding_params(return_multivector=return_multivector, truncate_dim=truncate_dim)
         task = self._validate_task(task)
         # Convert single image to list