feat-rename-vector-type-0622 (#21)

- feat: avoid the redundant words in the variables (7f10796af034d90842575c6a877c3ef8b8d0b212)
- feat: use enum for the vector type (e7230645cd96df2626c429031ef6d9761c595ab5)
- Merge branch 'main' into pr/21 (085e2ed8f55f14e4ea5a67596d41bf50026ee9f3)
- refactor: rename vector_type to output_format (96925c43b3978bb6de3d3ab0ebfb27701d625f1a)
- feat: rename the VectorType (669c42abab2468a13298a192ff96826e6d8394f1)
- feat: fix the default values (bb1572174c755b90eb888cb78c496db2c3a8ecf4)
- feat: replace the output_format with a boolean flag (1ffab4f0c4c3d022d3c4e3555fd7bcc362262c1f)
- feat: avoid validating return_multivector (fe4c51b73e21a2ac2f1ff293337a3cac82517e88)
- feat: return a list when the input is a list (f7df96abf5c4741c0e88f6b30b347bb7191f7596)

Files changed (1) hide show

modeling_jina_embeddings_v4.py +19 -24

modeling_jina_embeddings_v4.py CHANGED Viewed

@@ -31,7 +31,6 @@ class PromptType(str, Enum):
 PREFIX_DICT = {"query": "Query", "passage": "Passage"}
-VECTOR_TYPES = ["single_vector", "multi_vector"]
 class JinaEmbeddingsV4Processor(Qwen2_5_VLProcessor):
@@ -284,8 +283,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             attention_mask (torch.Tensor): The attention mask tensor.
         Returns:
             JinaEmbeddingsV4ModelOutput:
-                single_vector (torch.Tensor): Single-vector embeddings of shape (batch_size, dim).
-                multi_vector (torch.Tensor): Multi-vector embeddings of shape (batch_size, num_tokens, dim).
         """
         # Forward pass through the VLM
         hidden_states = self.get_last_hidden_states(
@@ -320,7 +320,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         task_label: Union[str, List[str]],
         processor_fn: Callable,
         desc: str,
-        vector_type: str = "single_vector",
         return_numpy: bool = False,
         batch_size: int = 32,
         truncate_dim: Optional[int] = None,
@@ -340,7 +340,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
                     device_type=torch.device(self.device).type, dtype=torch.bfloat16
                 ):
                     embeddings = self(**batch, task_label=task_label)
-                    if vector_type == "single_vector":
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
@@ -357,7 +357,6 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
     def _validate_encoding_params(
         self,
-        vector_type: Optional[str] = None,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
     ) -> Dict[str, Any]:
@@ -374,14 +373,6 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
                     else PREFIX_DICT["query"]
                 )
-        vector_type = vector_type or "single_vector"
-        if vector_type not in VECTOR_TYPES:
-            raise ValueError(
-                f"Invalid vector_type: {vector_type}. Must be one of {VECTOR_TYPES}."
-            )
-        else:
-            encode_kwargs["vector_type"] = vector_type
         truncate_dim = truncate_dim or self.config.truncate_dim
         if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
             raise ValueError(
@@ -413,7 +404,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
-        vector_type: Optional[str] = None,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
@@ -425,7 +416,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             texts: text or list of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
-            vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             prompt_name: Type of text being encoded ('query' or 'passage')
@@ -434,9 +425,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
         """
         prompt_name = prompt_name or "query"
-        encode_kwargs = self._validate_encoding_params(
-            vector_type, truncate_dim, prompt_name
-        )
         task = self._validate_task(task)
@@ -446,6 +435,8 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             prefix=encode_kwargs.pop("prefix"),
         )
         if isinstance(texts, str):
             texts = [texts]
@@ -454,12 +445,13 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             processor_fn=processor_fn,
             desc="Encoding texts...",
             task_label=task,
             return_numpy=return_numpy,
             batch_size=batch_size,
             **encode_kwargs,
         )
-        return embeddings if len(texts) > 1 else embeddings[0]
     def _load_images_if_needed(
         self, images: List[Union[str, Image.Image]]
@@ -480,7 +472,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         images: Union[str, Image.Image, List[Union[str, Image.Image]]],
         task: Optional[str] = None,
         batch_size: int = 8,
-        vector_type: Optional[str] = None,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         max_pixels: Optional[int] = None,
@@ -491,7 +483,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         Args:
             images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
             batch_size: Number of images to process at once
-            vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             max_pixels: Maximum number of pixels to process per image
@@ -504,9 +496,11 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             self.processor.image_processor.max_pixels = (
                 max_pixels  # change during encoding
             )
-        encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
         task = self._validate_task(task)
         # Convert single image to list
         if isinstance(images, (str, Image.Image)):
             images = [images]
@@ -518,6 +512,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             desc="Encoding images...",
             task_label=task,
             batch_size=batch_size,
             return_numpy=return_numpy,
             **encode_kwargs,
         )
@@ -525,7 +520,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         if max_pixels:
             self.processor.image_processor.max_pixels = default_max_pixels
-        return embeddings if len(images) > 1 else embeddings[0]
     @classmethod
     def from_pretrained(

 PREFIX_DICT = {"query": "Query", "passage": "Passage"}
 class JinaEmbeddingsV4Processor(Qwen2_5_VLProcessor):
             attention_mask (torch.Tensor): The attention mask tensor.
         Returns:
             JinaEmbeddingsV4ModelOutput:
+                vlm_last_hidden_states (torch.Tensor, optional): Last hidden states of the VLM.
+                single_vec_emb (torch.Tensor, optional): Single-vector embeddings.
+                multi_vec_emb (torch.Tensor, optional): Multi-vector embeddings.
         """
         # Forward pass through the VLM
         hidden_states = self.get_last_hidden_states(
         task_label: Union[str, List[str]],
         processor_fn: Callable,
         desc: str,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         batch_size: int = 32,
         truncate_dim: Optional[int] = None,
                     device_type=torch.device(self.device).type, dtype=torch.bfloat16
                 ):
                     embeddings = self(**batch, task_label=task_label)
+                    if not return_multivector:
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
     def _validate_encoding_params(
         self,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
     ) -> Dict[str, Any]:
                     else PREFIX_DICT["query"]
                 )
         truncate_dim = truncate_dim or self.config.truncate_dim
         if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
             raise ValueError(
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         prompt_name: Optional[str] = None,
             texts: text or list of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             prompt_name: Type of text being encoded ('query' or 'passage')
             List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
         """
         prompt_name = prompt_name or "query"
+        encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim, prompt_name=prompt_name)
         task = self._validate_task(task)
             prefix=encode_kwargs.pop("prefix"),
         )
+        return_list = isinstance(texts, list)
         if isinstance(texts, str):
             texts = [texts]
             processor_fn=processor_fn,
             desc="Encoding texts...",
             task_label=task,
+            return_multivector=return_multivector,
             return_numpy=return_numpy,
             batch_size=batch_size,
             **encode_kwargs,
         )
+        return embeddings if return_list else embeddings[0]
     def _load_images_if_needed(
         self, images: List[Union[str, Image.Image]]
         images: Union[str, Image.Image, List[Union[str, Image.Image]]],
         task: Optional[str] = None,
         batch_size: int = 8,
+        return_multivector: bool = False,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
         max_pixels: Optional[int] = None,
         Args:
             images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
             batch_size: Number of images to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
             max_pixels: Maximum number of pixels to process per image
             self.processor.image_processor.max_pixels = (
                 max_pixels  # change during encoding
             )
+        encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim)
         task = self._validate_task(task)
+        return_list = isinstance(images, list)
         # Convert single image to list
         if isinstance(images, (str, Image.Image)):
             images = [images]
             desc="Encoding images...",
             task_label=task,
             batch_size=batch_size,
+            return_multivector=return_multivector,
             return_numpy=return_numpy,
             **encode_kwargs,
         )
         if max_pixels:
             self.processor.image_processor.max_pixels = default_max_pixels
+        return embeddings if return_list else embeddings[0]
     @classmethod
     def from_pretrained(