In [None]:
import os
from typing import Dict, List

IS_COLAB = True if "GOOGLE_CLOUD_PROJECT" in os.environ else False
if IS_COLAB:
 # this needs to run before all other imports
 os.environ["HF_HOME"] = "/content/cache/" # to avoid running out of disk space

import mteb
import numpy as np
import torch
from mteb.encoder_interface import PromptType
from sentence_transformers import SentenceTransformer

### Notebook Configuration

In [None]:
MODELS = {
 "ir-prod": {
 "name": "MongoDB/mdbr-leaf-ir",
 "revision": "2e46f5aac796e621d51f678c306a66ede4712ecb",
 "teacher": "Snowflake/snowflake-arctic-embed-m-v1.5",
 },
 "ir-paper": {
 "name": "MongoDB/mdbr-leaf-ir",
 "revision": "ea98995e96beac21b820aa8ad9afaa6fd29b243d",
 "teacher": "Snowflake/snowflake-arctic-embed-m-v1.5",
 },
 "mt-prod": {
 "name": "MongoDB/mdbr-leaf-mt",
 "revision": "66c47ba6d753efc208d54412b5af6c744a39a4df",
 "teacher": "mixedbread-ai/mxbai-embed-large-v1",
 },
 "mt-paper": {
 "name": "MongoDB/mdbr-leaf-mt",
 "revision": "c342f945a6855346bd5f48d5ee8b7e39120b0ce9",
 "teacher": "mixedbread-ai/mxbai-embed-large-v1",
 },
}

In the cell below:
* set the output folder and
* select one of the models defined above
* desired benchmark

In [None]:
# output_folder = f"../../data/results/publish/"
output_folder = f"/content/data/results/publish/"

model_selection = MODELS["ir-prod"]
benchmark_name = "BEIR"

# model_selection = MODELS['mt-prod']
# benchmark_name = "MTEB(eng, v2)"

### Run Evals

In [None]:
benchmark = mteb.get_benchmark(benchmark_name)
evaluation = mteb.MTEB(tasks=benchmark)

In [None]:
model = SentenceTransformer(model_selection["name"], revision=model_selection["revision"])

# alternative:
# meta = mteb.get_model_meta(
# model_name=model_selection['name'],
# revision=model_selection['revision']
# )
# model = meta.load_model()

In [None]:
%%time
results = evaluation.run(
 model=model,
 verbosity=1,
 output_folder=output_folder,
 overwrite_results=True,
)

Evaluate Quora

In [None]:
if model_selection["name"].endswith("ir"):
 # quora is closer to a sentence similarity task than a retrieval one, as queries aren't proper user queries
 # we thus embed them without the typical query prompt
 model.prompts = {}
 tasks = mteb.get_tasks(
 tasks=[
 "QuoraRetrieval",
 ]
 )

 evaluation = mteb.MTEB(tasks=tasks)
 results = evaluation.run(
 model=model,
 verbosity=1,
 output_folder=output_folder,
 overwrite_results=True,
 )

### Asymmetric Mode

Compute asymmetric mode scores: queries encoded by `leaf`, documents by the original teacher model.

In [None]:
class AsymmetricModel:
 def __init__(
 self,
 doc_model: SentenceTransformer,
 query_model: SentenceTransformer,
 ) -> None:
 self.doc_model = doc_model
 self.query_model = query_model

 def encode(self, sentences: List[str], **kwargs) -> np.ndarray | torch.Tensor:
 if "prompt_type" not in kwargs:
 kwargs["prompt_type"] = None

 match kwargs["prompt_type"]:
 case PromptType.query:
 out = self.query_model.encode(sentences, prompt_name="query", **kwargs)

 case PromptType.document:
 out = self.doc_model.encode(sentences, **kwargs)

 case None:
 print("No prompt type: using query (leaf) model for encoding")
 out = self.query_model.encode(sentences, **kwargs)
 case _:
 raise ValueError(f"Encoding unknown type: {kwargs['prompt_type']}")

 if not isinstance(out, torch.Tensor):
 out = torch.from_numpy(out)

 out = out.to("cpu")
 return out

In [None]:
leaf = SentenceTransformer(model_selection["name"], revision=model_selection["revision"])
teacher = SentenceTransformer(model_selection["teacher"])

asymm_model = AsymmetricModel(
 query_model=leaf,
 doc_model=teacher,
)

In [None]:
%%time
results = evaluation.run(
 model=asymm_model,
 verbosity=1,
 output_folder=output_folder,
 overwrite_results=True,
)