--- license: apache-2.0 library_name: transformers.js language: - en pipeline_tag: sentence-similarity base_model: - Qdrant/all_miniLM_L6_v2_with_attentions - sentence-transformers/all-MiniLM-L6-v2 --- ONNX port of [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) adjusted to return attention weights. This model is intended to be used for [BM42 searches](https://qdrant.tech/articles/bm42/). > Fixes an issue with the [Qdrant version](https://huggingface.co/Qdrant/all_miniLM_L6_v2_with_attentions) not having the onnx folder so transformers.js cant use it. ### Usage > Note: > This model is supposed to be used with Qdrant. Vectors have to be configured with [Modifier.IDF](https://qdrant.tech/documentation/concepts/indexing/?q=modifier#idf-modifier). ```typescript import { AutoTokenizer, AutoModel, TokenizerModel } from '@xenova/transformers'; documents = [ "You should stay, study and sprint.", "History can only prepare us to be surprised yet again.", ] const MODEL_ID = "bradynapier/all_miniLM_L6_v2_with_attentions_onnx" const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID, { revision: 'main', }) // this has some useful utils that transforms py has in the tokenizer ... const tokenizerModel = TokenizerModel.fromConfig(tokenizer.model.config) const model = await AutoModel.from_pretrained(MODEL_ID, { quantized: false, revision: 'main', }); // the types are wildy incorrect... but this should get you what you need! ``` #### Rough Outline of Getting Attentions > This may not be the best way but the documentation is truly lacking and this does the job :-P ```typescript /** * Minimal attention tensor shape we rely on. * Only `dims` and `data` are used (dims = [B=1, H, T, T]). */ type XtTensor = { dims: number[]; data: ArrayLike }; /** * Collect attentions across layers from a model.forward(...) output. * * ⚠️ Transformers.js variation: * - Some builds return `{ attentions: Tensor[] }`. * - Others return a dict with `attention_1`, `attention_2`, ... per layer. * * @internal * @param out Raw dictionary from `model.forward(...)`. * @returns Array of attention tensors (one per layer) with dims `[1, H, T, T]`. */ function collectAttentions(out: Record): XtTensor[] { // Prefer array form if present (runtime feature; TS types don’t guarantee it). const anyOut = out as unknown as { attentions?: XtTensor[] }; if (Array.isArray(anyOut.attentions)) return anyOut.attentions; // Otherwise gather attention_1..attention_N and sort numerically by suffix. const keys = Object.keys(out) .filter((k) => /^attention_\d+$/i.test(k)) .sort( (a, b) => parseInt(a.split('_')[1], 10) - parseInt(b.split('_')[1], 10), ); return keys.map((k) => out[k] as unknown as XtTensor); } function onesMask(n: number): Tensor { const data = BigInt64Array.from({ length: n }, () => 1n); return new Tensor('int64', data, [1, n]); } /** * Tokenization: * Prefer the public callable form `tokenizer(text, {...})` which returns tensors. * In case your wrapper only exposes a `_call` (private-ish) we fall back to it here. * The return includes `input_ids` and `attention_mask` tensors. */ const enc = typeof (tokenizer as typeof tokenizer._call) === 'function' ? // eslint-disable-next-line @typescript-eslint/await-thenable await (tokenizer as typeof tokenizer._call)(text, { add_special_tokens: true, }) : tokenizer._call(text, { add_special_tokens: true }); // <-- documented hack // Convert tensor buffers (may be BigInt) → number[] for downstream processing. const input_ids = Array.from( (enc.input_ids as Tensor).data as ArrayLike, ).map(Number); /** * Forward pass with attentions. * * Another "crazy" bit: different Transformers.js builds expose attentions differently. We: * - accept `{ attentions: Tensor[] }`, or * - collect `attention_1, attention_2, ...` and sort them. * Also, `Tensor` has no `.get(...)` so we do **flat buffer indexing** with `dims`. */ const out = (await model.forward({ input_ids, attention_mask: onesMask(input_ids.length), output_attentions: true, })) as unknown as Record; const attentions = collectAttentions(out) ```