File size: 4,326 Bytes
1fb511d 6e5c530 1fb511d e56cd3d 1fb511d e56cd3d 1fa901e e56cd3d 1fb511d 1598b0f 1fb511d 1eaf377 1fb511d 1598b0f 1fb511d 1eaf377 1fb511d dfcd835 1eaf377 1fb511d 1eaf377 b470e31 bbcfc70 b470e31 dc8a6bf b470e31 dc8a6bf b470e31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
---
license: apache-2.0
library_name: transformers.js
language:
- en
pipeline_tag: sentence-similarity
base_model:
- Qdrant/all_miniLM_L6_v2_with_attentions
- sentence-transformers/all-MiniLM-L6-v2
---
ONNX port of [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) adjusted to return attention weights.
This model is intended to be used for [BM42 searches](https://qdrant.tech/articles/bm42/).
> Fixes an issue with the [Qdrant version](https://huggingface.co/Qdrant/all_miniLM_L6_v2_with_attentions) not having the onnx folder so transformers.js cant use it.
### Usage
> Note:
> This model is supposed to be used with Qdrant. Vectors have to be configured with [Modifier.IDF](https://qdrant.tech/documentation/concepts/indexing/?q=modifier#idf-modifier).
```typescript
import { AutoTokenizer, AutoModel, TokenizerModel } from '@xenova/transformers';
documents = [
"You should stay, study and sprint.",
"History can only prepare us to be surprised yet again.",
]
const MODEL_ID = "bradynapier/all_miniLM_L6_v2_with_attentions_onnx"
const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID, {
revision: 'main',
})
// this has some useful utils that transforms py has in the tokenizer ...
const tokenizerModel = TokenizerModel.fromConfig(tokenizer.model.config)
const model = await AutoModel.from_pretrained(MODEL_ID, {
quantized: false,
revision: 'main',
});
// the types are wildy incorrect... but this should get you what you need!
```
#### Rough Outline of Getting Attentions
> This may not be the best way but the documentation is truly lacking and this does the job :-P
```typescript
/**
* Minimal attention tensor shape we rely on.
* Only `dims` and `data` are used (dims = [B=1, H, T, T]).
*/
type XtTensor = { dims: number[]; data: ArrayLike<number | bigint> };
/**
* Collect attentions across layers from a model.forward(...) output.
*
* ⚠️ Transformers.js variation:
* - Some builds return `{ attentions: Tensor[] }`.
* - Others return a dict with `attention_1`, `attention_2`, ... per layer.
*
* @internal
* @param out Raw dictionary from `model.forward(...)`.
* @returns Array of attention tensors (one per layer) with dims `[1, H, T, T]`.
*/
function collectAttentions(out: Record<string, Tensor>): XtTensor[] {
// Prefer array form if present (runtime feature; TS types don’t guarantee it).
const anyOut = out as unknown as { attentions?: XtTensor[] };
if (Array.isArray(anyOut.attentions)) return anyOut.attentions;
// Otherwise gather attention_1..attention_N and sort numerically by suffix.
const keys = Object.keys(out)
.filter((k) => /^attention_\d+$/i.test(k))
.sort(
(a, b) => parseInt(a.split('_')[1], 10) - parseInt(b.split('_')[1], 10),
);
return keys.map((k) => out[k] as unknown as XtTensor);
}
function onesMask(n: number): Tensor {
const data = BigInt64Array.from({ length: n }, () => 1n);
return new Tensor('int64', data, [1, n]);
}
/**
* Tokenization:
* Prefer the public callable form `tokenizer(text, {...})` which returns tensors.
* In case your wrapper only exposes a `_call` (private-ish) we fall back to it here.
* The return includes `input_ids` and `attention_mask` tensors.
*/
const enc =
typeof (tokenizer as typeof tokenizer._call) === 'function' ?
// eslint-disable-next-line @typescript-eslint/await-thenable
await (tokenizer as typeof tokenizer._call)(text, {
add_special_tokens: true,
})
: tokenizer._call(text, { add_special_tokens: true }); // <-- documented hack
// Convert tensor buffers (may be BigInt) → number[] for downstream processing.
const input_ids = Array.from(
(enc.input_ids as Tensor).data as ArrayLike<number | bigint>,
).map(Number);
/**
* Forward pass with attentions.
*
* Another "crazy" bit: different Transformers.js builds expose attentions differently. We:
* - accept `{ attentions: Tensor[] }`, or
* - collect `attention_1, attention_2, ...` and sort them.
* Also, `Tensor` has no `.get(...)` so we do **flat buffer indexing** with `dims`.
*/
const out = (await model.forward({
input_ids,
attention_mask: onesMask(input_ids.length),
output_attentions: true,
})) as unknown as Record<string, Tensor>;
const attentions = collectAttentions(out)
```
|