{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "2a12a2b3", "metadata": {}, "outputs": [], "source": [ "from safetensors import safe_open\n", "import torch\n", "from torch.nn import functional as F\n", "from transformers import AutoModel, AutoTokenizer" ] }, { "cell_type": "code", "execution_count": null, "id": "148ce181", "metadata": {}, "outputs": [], "source": [ "# First clone the model locally\n", "!git clone https://huggingface.co/MongoDB/mdbr-leaf-mt" ] }, { "cell_type": "code", "execution_count": 2, "id": "ba9ec6c7", "metadata": {}, "outputs": [], "source": [ "# Then load it\n", "MODEL = \"mdbr-leaf-mt\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(MODEL)\n", "model = AutoModel.from_pretrained(MODEL, add_pooling_layer=False)" ] }, { "cell_type": "code", "execution_count": 3, "id": "ebaf1a76", "metadata": {}, "outputs": [], "source": [ "tensors = {}\n", "with safe_open(MODEL + \"/2_Dense/model.safetensors\", framework=\"pt\") as f:\n", " for k in f.keys():\n", " tensors[k] = f.get_tensor(k)" ] }, { "cell_type": "code", "execution_count": null, "id": "03ffcd9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Similarities:\n", "tensor([[0.9063, 0.7287],\n", " [0.6725, 0.8287]])\n" ] } ], "source": [ "if 'linear.bias' in tensors:\n", " W_out = torch.nn.Linear(in_features=384, out_features=1024, bias=True)\n", " W_out.load_state_dict({\n", " \"weight\": tensors[\"linear.weight\"], \n", " \"bias\": tensors[\"linear.bias\"]\n", " })\n", "else:\n", " W_out = torch.nn.Linear(in_features=384, out_features=1024, bias=False)\n", " W_out.load_state_dict({\n", " \"weight\": tensors[\"linear.weight\"]\n", " })\n", "\n", "_ = model.eval()\n", "_ = W_out.eval()\n", "\n", "# Example queries and documents \n", "queries = [\n", " \"What is machine learning?\", \n", " \"How does neural network training work?\" \n", "] \n", " \n", "documents = [ \n", " \"Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data.\", \n", " \"Neural networks are trained through backpropagation, adjusting weights to minimize prediction errors.\" \n", "]\n", "\n", "# Tokenize\n", "QUERY_PREFIX = 'Represent this sentence for searching relevant passages: '\n", "queries_with_prefix = [QUERY_PREFIX + query for query in queries]\n", "\n", "query_tokens = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=512)\n", "document_tokens = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=512)\n", "\n", "# Perform Inference\n", "with torch.inference_mode():\n", " y_queries = model(**query_tokens).last_hidden_state\n", " y_docs = model(**document_tokens).last_hidden_state\n", "\n", " # perform pooling\n", " y_queries = y_queries * query_tokens.attention_mask.unsqueeze(-1)\n", " y_queries_pooled = y_queries.sum(dim=1) / query_tokens.attention_mask.sum(dim=1, keepdim=True)\n", "\n", " y_docs = y_docs * document_tokens.attention_mask.unsqueeze(-1)\n", " y_docs_pooled = y_docs.sum(dim=1) / document_tokens.attention_mask.sum(dim=1, keepdim=True)\n", "\n", " # map to desired output dimension\n", " query_embeddings = W_out(y_queries_pooled)\n", " document_embeddings = W_out(y_docs_pooled)\n", "\n", "similarities = F.cosine_similarity(query_embeddings.unsqueeze(0), document_embeddings.unsqueeze(1), dim=-1).T\n", "print(f\"Similarities:\\n{similarities}\")\n", "\n", "# Similarities:\n", "# tensor([[0.9063, 0.7287],\n", "# [0.6725, 0.8287]])" ] }, { "cell_type": "code", "execution_count": null, "id": "5a2b0244", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "alexis", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }