In [None]:
import os

IS_COLAB = True if 'GOOGLE_CLOUD_PROJECT' in os.environ else False
if IS_COLAB:
    # this needs to run before all other imports
    os.environ['HF_HOME'] = '/content/cache/'  # to avoid running out of disk space

import mteb
from sentence_transformers import SentenceTransformer

In [None]:
MODELS = {
    'ir-prod': {
        'name': 'MongoDB/mdbr-leaf-ir',
        'revision': '2e46f5aac796e621d51f678c306a66ede4712ecb'
    },
    'ir-paper': {
        'name': 'MongoDB/mdbr-leaf-ir',
        'revision': 'ea98995e96beac21b820aa8ad9afaa6fd29b243d'
    },
    'mt-prod': {
        'name': 'MongoDB/mdbr-leaf-mt',
        'revision': '66c47ba6d753efc208d54412b5af6c744a39a4df'
    },
    'mt-paper': {
        'name': 'MongoDB/mdbr-leaf-mt',
        'revision': 'c342f945a6855346bd5f48d5ee8b7e39120b0ce9',
    }
}

**Notebook configuration**:
* set the output folder and
* select one of the models defined above
* desired benchmark

In [None]:
output_folder = f"../../data/results/publish/"

model_selection = MODELS['ir-prod']
benchmark_name = "BEIR"

# model_selection = MODELS['mt-prod']
# benchmark_name = "MTEB(eng, v2)"

Load the model and run the evals

In [None]:
model = SentenceTransformer(
    model_selection['name'],
    revision=model_selection['revision']
)

# alternative:
# meta = mteb.get_model_meta(
#     model_name=model_selection['name'],
#     revision=model_selection['revision']
# )
# model = meta.load_model()

In [None]:
benchmark = mteb.get_benchmark(benchmark_name)
evaluation = mteb.MTEB(tasks=benchmark)

In [None]:
%%time
results = evaluation.run(
    model=model,
    verbosity=1,
    output_folder=output_folder,
    overwrite_results=True,
)

Evaluate Quora

In [None]:
if model_selection['name'].endswith('ir'):
    # quora is closer to a sentence similarity task than a retrieval one, as queries aren't proper user queries
    # we thus embed them without the typical query prompt
    model.prompts = {}
    tasks = mteb.get_tasks(tasks=[
        "QuoraRetrieval",
    ])

    evaluation = mteb.MTEB(tasks=tasks)
    results = evaluation.run(
        model=model,
        verbosity=1,
        output_folder=output_folder,
        overwrite_results=True,
    )