support sentence-transformers

#4
Jina AI org
No description provided.
Jina AI org

testing script:

import numpy as np
import numpy.testing as npt
from sentence_transformers import SentenceTransformer
from transformers import AutoModel

model = SentenceTransformer('bwang0911/jina-v3-test', trust_remote_code=True)
model = model.to('cuda').half()
e = model.encode(['Hello world'])


import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer


def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )


tokenizer = AutoTokenizer.from_pretrained('bwang0911/jina-v3-test')
m2 = AutoModel.from_pretrained('bwang0911/jina-v3-test', trust_remote_code=True)
m2 = m2.to('cuda').half()
encoded_input = tokenizer(
    'Hello world', padding=True, truncation=True, return_tensors='pt'
).to('cuda')

with torch.no_grad():
    model_output = m2(**encoded_input)
    e2 = mean_pooling(model_output, encoded_input['attention_mask'])


e2 = e2.detach().cpu().numpy()


npt.assert_almost_equal(e, e2, 5)
Jina AI org
Max absolute difference: 0.00097656
Max relative difference: 0.00048379
 x: array([[ 1.865  , -1.623  ,  2.912  , ...,  0.388  , -0.1466 , -0.02988]],
      dtype=float16)
 y: array([[ 1.86548, -1.62329,  2.91211, ...,  0.388  , -0.14662, -0.02988]],
      dtype=float32)
Jina AI org

LGTM! I'd recommend to compare embeddings with the HF encode() function too, and also in a num_examples > 1 setup

Jina AI org

will make follow up and a bit more intensive testing on more adapters, and remove unneed imports.

bwang0911 changed pull request status to merged

Sign up or log in to comment