support sentence-transformers
#4
by
bwang0911
- opened
No description provided.
testing script:
import numpy as np
import numpy.testing as npt
from sentence_transformers import SentenceTransformer
from transformers import AutoModel
model = SentenceTransformer('bwang0911/jina-v3-test', trust_remote_code=True)
model = model.to('cuda').half()
e = model.encode(['Hello world'])
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
tokenizer = AutoTokenizer.from_pretrained('bwang0911/jina-v3-test')
m2 = AutoModel.from_pretrained('bwang0911/jina-v3-test', trust_remote_code=True)
m2 = m2.to('cuda').half()
encoded_input = tokenizer(
'Hello world', padding=True, truncation=True, return_tensors='pt'
).to('cuda')
with torch.no_grad():
model_output = m2(**encoded_input)
e2 = mean_pooling(model_output, encoded_input['attention_mask'])
e2 = e2.detach().cpu().numpy()
npt.assert_almost_equal(e, e2, 5)
Max absolute difference: 0.00097656
Max relative difference: 0.00048379
x: array([[ 1.865 , -1.623 , 2.912 , ..., 0.388 , -0.1466 , -0.02988]],
dtype=float16)
y: array([[ 1.86548, -1.62329, 2.91211, ..., 0.388 , -0.14662, -0.02988]],
dtype=float32)
LGTM! I'd recommend to compare embeddings with the HF encode() function too, and also in a num_examples > 1 setup
will make follow up and a bit more intensive testing on more adapters, and remove unneed imports.
bwang0911
changed pull request status to
merged