{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true }, "source": [ "import os\n", "\n", "IS_COLAB = True if 'GOOGLE_CLOUD_PROJECT' in os.environ else False\n", "if IS_COLAB:\n", " # this needs to run before all other imports\n", " os.environ['HF_HOME'] = '/content/cache/' # to avoid running out of disk space\n", "\n", "import mteb\n", "from sentence_transformers import SentenceTransformer" ], "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "MODELS = {\n", " 'ir-prod': {\n", " 'name': 'MongoDB/mdbr-leaf-ir',\n", " 'revision': '2e46f5aac796e621d51f678c306a66ede4712ecb'\n", " },\n", " 'ir-paper': {\n", " 'name': 'MongoDB/mdbr-leaf-ir',\n", " 'revision': 'ea98995e96beac21b820aa8ad9afaa6fd29b243d'\n", " },\n", " 'mt-prod': {\n", " 'name': 'MongoDB/mdbr-leaf-mt',\n", " 'revision': '66c47ba6d753efc208d54412b5af6c744a39a4df'\n", " },\n", " 'mt-paper': {\n", " 'name': 'MongoDB/mdbr-leaf-mt',\n", " 'revision': 'c342f945a6855346bd5f48d5ee8b7e39120b0ce9',\n", " }\n", "}" ], "id": "f0189ff1e7814a5a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "markdown", "source": [ "**Notebook configuration**:\n", "* set the output folder and\n", "* select one of the models defined above\n", "* desired benchmark" ], "id": "371c6122efdf476a" }, { "metadata": {}, "cell_type": "code", "source": [ "output_folder = f\"../../data/results/publish/\"\n", "\n", "model_selection = MODELS['ir-prod']\n", "benchmark_name = \"BEIR\"\n", "\n", "# model_selection = MODELS['mt-prod']\n", "# benchmark_name = \"MTEB(eng, v2)\"" ], "id": "58d52a330febb9ac", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "markdown", "source": "Load the model and run the evals", "id": "1b4367afc1278e" }, { "metadata": {}, "cell_type": "code", "source": [ "model = SentenceTransformer(\n", " model_selection['name'],\n", " revision=model_selection['revision']\n", ")\n", "\n", "# alternative:\n", "# meta = mteb.get_model_meta(\n", "# model_name=model_selection['name'],\n", "# revision=model_selection['revision']\n", "# )\n", "# model = meta.load_model()" ], "id": "d6f13945a94f7a85", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "benchmark = mteb.get_benchmark(benchmark_name)\n", "evaluation = mteb.MTEB(tasks=benchmark)" ], "id": "c716c6344f9cd939", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "%%time\n", "results = evaluation.run(\n", " model=model,\n", " verbosity=1,\n", " output_folder=output_folder,\n", " overwrite_results=True,\n", ")" ], "id": "9bd44e88fc360663", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "markdown", "source": "Evaluate Quora", "id": "733e52ca41cf92a7" }, { "metadata": {}, "cell_type": "code", "source": [ "if model_selection['name'].endswith('ir'):\n", " # quora is closer to a sentence similarity task than a retrieval one, as queries aren't proper user queries\n", " # we thus embed them without the typical query prompt\n", " model.prompts = {}\n", " tasks = mteb.get_tasks(tasks=[\n", " \"QuoraRetrieval\",\n", " ])\n", "\n", " evaluation = mteb.MTEB(tasks=tasks)\n", " results = evaluation.run(\n", " model=model,\n", " verbosity=1,\n", " output_folder=output_folder,\n", " overwrite_results=True,\n", " )" ], "id": "61aea9a04468202f", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }