Upload optimized ONNX model (#1)
Browse files- Upload optimized ONNX model (3cd35e658a49354e0dca26f8d45f7b04e8d09c74)
- Upload 4 files (e1572ef50cc267d1886924d15f4b3452eacf985e)
- Update README.md (583f336ed28b809e962174cd047aaf28885672fc)
- Upload README.md (8b6086b046837c3cc9567016ee1990692338b50a)
Co-authored-by: Joshua <[email protected]>
- README.md +2 -1
- onnx/model.onnx +1 -1
- onnx/model_fp16.onnx +2 -2
- onnx/model_q4.onnx +1 -1
- onnx/model_q4f16.onnx +2 -2
- onnx/model_quantized.onnx +1 -1
README.md
CHANGED
|
@@ -8,6 +8,7 @@ tags:
|
|
| 8 |
- text-embeddings-inference
|
| 9 |
- information-retrieval
|
| 10 |
- knowledge-distillation
|
|
|
|
| 11 |
language:
|
| 12 |
- en
|
| 13 |
---
|
|
@@ -136,7 +137,7 @@ import { AutoModel, AutoTokenizer, matmul } from "@huggingface/transformers";
|
|
| 136 |
const model_id = "MongoDB/mdbr-leaf-ir";
|
| 137 |
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
| 138 |
const model = await AutoModel.from_pretrained(model_id, {
|
| 139 |
-
dtype: "fp32", // Options: "fp32" | "q8" | "q4"
|
| 140 |
});
|
| 141 |
|
| 142 |
// Prepare queries and documents
|
|
|
|
| 8 |
- text-embeddings-inference
|
| 9 |
- information-retrieval
|
| 10 |
- knowledge-distillation
|
| 11 |
+
- transformers.js
|
| 12 |
language:
|
| 13 |
- en
|
| 14 |
---
|
|
|
|
| 137 |
const model_id = "MongoDB/mdbr-leaf-ir";
|
| 138 |
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
| 139 |
const model = await AutoModel.from_pretrained(model_id, {
|
| 140 |
+
dtype: "fp32", // Options: "fp32" | "fp16" | "q8" | "q4" | "q4f16"
|
| 141 |
});
|
| 142 |
|
| 143 |
// Prepare queries and documents
|
onnx/model.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 60894
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71f676c934c5f39fd7521c7fa24f549a06dd9766ced99a243fc9e8237a077c8a
|
| 3 |
size 60894
|
onnx/model_fp16.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:374a7564087a64532647bf66a0295e2bfe9d6c6538c41b3be0bceaf54053a118
|
| 3 |
+
size 100962
|
onnx/model_q4.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 74071
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:277a147fb6ad8b91d0f2c593a1a33bf671894641192f9e8a03da8c330a98ad9d
|
| 3 |
size 74071
|
onnx/model_q4f16.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d98d7a319f180c99a464c47c972cc52bb5316b5fd451fa479c8ee06147e05a6
|
| 3 |
+
size 113978
|
onnx/model_quantized.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 220597
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f043808bdfe3bdf3a7f7e14345d52a2d0d13c220b8986813755844fd806f1ba
|
| 3 |
size 220597
|