|
--- |
|
license: apache-2.0 |
|
library_name: transformers.js |
|
language: |
|
- en |
|
pipeline_tag: sentence-similarity |
|
base_model: |
|
- Qdrant/all_miniLM_L6_v2_with_attentions |
|
- sentence-transformers/all-MiniLM-L6-v2 |
|
--- |
|
|
|
ONNX port of [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) adjusted to return attention weights. |
|
|
|
This model is intended to be used for [BM42 searches](https://qdrant.tech/articles/bm42/). |
|
|
|
> Fixes an issue with the [Qdrant version](https://huggingface.co/Qdrant/all_miniLM_L6_v2_with_attentions) not having the onnx folder so transformers.js cant use it. |
|
|
|
### Usage |
|
|
|
> Note: |
|
> This model is supposed to be used with Qdrant. Vectors have to be configured with [Modifier.IDF](https://qdrant.tech/documentation/concepts/indexing/?q=modifier#idf-modifier). |
|
|
|
```typescript |
|
import { AutoTokenizer, AutoModel, TokenizerModel } from '@xenova/transformers'; |
|
|
|
documents = [ |
|
"You should stay, study and sprint.", |
|
"History can only prepare us to be surprised yet again.", |
|
] |
|
|
|
const MODEL_ID = "bradynapier/all_miniLM_L6_v2_with_attentions_onnx" |
|
|
|
const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID, { |
|
revision: 'main', |
|
}) |
|
|
|
// this has some useful utils that transforms py has in the tokenizer ... |
|
const tokenizerModel = TokenizerModel.fromConfig(tokenizer.model.config) |
|
|
|
const model = await AutoModel.from_pretrained(MODEL_ID, { |
|
quantized: false, |
|
revision: 'main', |
|
}); |
|
|
|
|
|
// the types are wildy incorrect... but this should get you what you need! |
|
``` |
|
|
|
#### Rough Outline of Getting Attentions |
|
|
|
> This may not be the best way but the documentation is truly lacking and this does the job :-P |
|
|
|
```typescript |
|
/** |
|
* Minimal attention tensor shape we rely on. |
|
* Only `dims` and `data` are used (dims = [B=1, H, T, T]). |
|
*/ |
|
type XtTensor = { dims: number[]; data: ArrayLike<number | bigint> }; |
|
|
|
/** |
|
* Collect attentions across layers from a model.forward(...) output. |
|
* |
|
* ⚠️ Transformers.js variation: |
|
* - Some builds return `{ attentions: Tensor[] }`. |
|
* - Others return a dict with `attention_1`, `attention_2`, ... per layer. |
|
* |
|
* @internal |
|
* @param out Raw dictionary from `model.forward(...)`. |
|
* @returns Array of attention tensors (one per layer) with dims `[1, H, T, T]`. |
|
*/ |
|
function collectAttentions(out: Record<string, Tensor>): XtTensor[] { |
|
// Prefer array form if present (runtime feature; TS types don’t guarantee it). |
|
const anyOut = out as unknown as { attentions?: XtTensor[] }; |
|
if (Array.isArray(anyOut.attentions)) return anyOut.attentions; |
|
|
|
// Otherwise gather attention_1..attention_N and sort numerically by suffix. |
|
const keys = Object.keys(out) |
|
.filter((k) => /^attention_\d+$/i.test(k)) |
|
.sort( |
|
(a, b) => parseInt(a.split('_')[1], 10) - parseInt(b.split('_')[1], 10), |
|
); |
|
|
|
return keys.map((k) => out[k] as unknown as XtTensor); |
|
} |
|
|
|
function onesMask(n: number): Tensor { |
|
const data = BigInt64Array.from({ length: n }, () => 1n); |
|
return new Tensor('int64', data, [1, n]); |
|
} |
|
|
|
|
|
/** |
|
* Tokenization: |
|
* Prefer the public callable form `tokenizer(text, {...})` which returns tensors. |
|
* In case your wrapper only exposes a `_call` (private-ish) we fall back to it here. |
|
* The return includes `input_ids` and `attention_mask` tensors. |
|
*/ |
|
const enc = |
|
typeof (tokenizer as typeof tokenizer._call) === 'function' ? |
|
// eslint-disable-next-line @typescript-eslint/await-thenable |
|
await (tokenizer as typeof tokenizer._call)(text, { |
|
add_special_tokens: true, |
|
}) |
|
: tokenizer._call(text, { add_special_tokens: true }); // <-- documented hack |
|
|
|
// Convert tensor buffers (may be BigInt) → number[] for downstream processing. |
|
const input_ids = Array.from( |
|
(enc.input_ids as Tensor).data as ArrayLike<number | bigint>, |
|
).map(Number); |
|
|
|
/** |
|
* Forward pass with attentions. |
|
* |
|
* Another "crazy" bit: different Transformers.js builds expose attentions differently. We: |
|
* - accept `{ attentions: Tensor[] }`, or |
|
* - collect `attention_1, attention_2, ...` and sort them. |
|
* Also, `Tensor` has no `.get(...)` so we do **flat buffer indexing** with `dims`. |
|
*/ |
|
const out = (await model.forward({ |
|
input_ids, |
|
attention_mask: onesMask(input_ids.length), |
|
output_attentions: true, |
|
})) as unknown as Record<string, Tensor>; |
|
|
|
const attentions = collectAttentions(out) |
|
``` |
|
|