Add new SparseEncoder model
Browse files- README.md +103 -96
- config.json +1 -1
- config_sentence_transformers.json +1 -1
- model.safetensors +1 -1
- special_tokens_map.json +35 -5
- tokenizer_config.json +8 -1
README.md
CHANGED
@@ -8,28 +8,30 @@ tags:
|
|
8 |
- sparse
|
9 |
- splade
|
10 |
- generated_from_trainer
|
11 |
-
- dataset_size:
|
12 |
- loss:SpladeLoss
|
13 |
- loss:SparseMultipleNegativesRankingLoss
|
14 |
- loss:FlopsLoss
|
15 |
base_model: prajjwal1/bert-tiny
|
16 |
widget:
|
17 |
-
- text:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
pipeline_tag: feature-extraction
|
34 |
library_name: sentence-transformers
|
35 |
metrics:
|
@@ -63,61 +65,61 @@ model-index:
|
|
63 |
type: unknown
|
64 |
metrics:
|
65 |
- type: dot_accuracy@1
|
66 |
-
value: 0.
|
67 |
name: Dot Accuracy@1
|
68 |
- type: dot_accuracy@3
|
69 |
-
value: 0.
|
70 |
name: Dot Accuracy@3
|
71 |
- type: dot_accuracy@5
|
72 |
-
value: 0.
|
73 |
name: Dot Accuracy@5
|
74 |
- type: dot_accuracy@10
|
75 |
-
value: 0.
|
76 |
name: Dot Accuracy@10
|
77 |
- type: dot_precision@1
|
78 |
-
value: 0.
|
79 |
name: Dot Precision@1
|
80 |
- type: dot_precision@3
|
81 |
-
value: 0.
|
82 |
name: Dot Precision@3
|
83 |
- type: dot_precision@5
|
84 |
-
value: 0.
|
85 |
name: Dot Precision@5
|
86 |
- type: dot_precision@10
|
87 |
-
value: 0.
|
88 |
name: Dot Precision@10
|
89 |
- type: dot_recall@1
|
90 |
-
value: 0.
|
91 |
name: Dot Recall@1
|
92 |
- type: dot_recall@3
|
93 |
-
value: 0.
|
94 |
name: Dot Recall@3
|
95 |
- type: dot_recall@5
|
96 |
-
value: 0.
|
97 |
name: Dot Recall@5
|
98 |
- type: dot_recall@10
|
99 |
-
value: 0.
|
100 |
name: Dot Recall@10
|
101 |
- type: dot_ndcg@10
|
102 |
-
value: 0.
|
103 |
name: Dot Ndcg@10
|
104 |
- type: dot_mrr@10
|
105 |
-
value: 0.
|
106 |
name: Dot Mrr@10
|
107 |
- type: dot_map@100
|
108 |
-
value: 0.
|
109 |
name: Dot Map@100
|
110 |
- type: query_active_dims
|
111 |
-
value:
|
112 |
name: Query Active Dims
|
113 |
- type: query_sparsity_ratio
|
114 |
-
value: 0.
|
115 |
name: Query Sparsity Ratio
|
116 |
- type: corpus_active_dims
|
117 |
-
value:
|
118 |
name: Corpus Active Dims
|
119 |
- type: corpus_sparsity_ratio
|
120 |
-
value: 0.
|
121 |
name: Corpus Sparsity Ratio
|
122 |
---
|
123 |
|
@@ -167,15 +169,15 @@ Then you can load this model and run inference.
|
|
167 |
from sentence_transformers import SparseEncoder
|
168 |
|
169 |
# Download from the 🤗 Hub
|
170 |
-
model = SparseEncoder("
|
171 |
# Run inference
|
172 |
queries = [
|
173 |
-
"
|
174 |
]
|
175 |
documents = [
|
176 |
-
'
|
177 |
-
'
|
178 |
-
'
|
179 |
]
|
180 |
query_embeddings = model.encode_query(queries)
|
181 |
document_embeddings = model.encode_document(documents)
|
@@ -185,7 +187,7 @@ print(query_embeddings.shape, document_embeddings.shape)
|
|
185 |
# Get the similarity scores for the embeddings
|
186 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
187 |
print(similarities)
|
188 |
-
# tensor([[
|
189 |
```
|
190 |
|
191 |
<!--
|
@@ -222,25 +224,25 @@ You can finetune this model on your own dataset.
|
|
222 |
|
223 |
| Metric | Value |
|
224 |
|:----------------------|:-----------|
|
225 |
-
| dot_accuracy@1 | 0.
|
226 |
-
| dot_accuracy@3 | 0.
|
227 |
-
| dot_accuracy@5 | 0.
|
228 |
-
| dot_accuracy@10 | 0.
|
229 |
-
| dot_precision@1 | 0.
|
230 |
-
| dot_precision@3 | 0.
|
231 |
-
| dot_precision@5 | 0.
|
232 |
-
| dot_precision@10 | 0.
|
233 |
-
| dot_recall@1 | 0.
|
234 |
-
| dot_recall@3 | 0.
|
235 |
-
| dot_recall@5 | 0.
|
236 |
-
| dot_recall@10 | 0.
|
237 |
-
| **dot_ndcg@10** | **0.
|
238 |
-
| dot_mrr@10 | 0.
|
239 |
-
| dot_map@100 | 0.
|
240 |
-
| query_active_dims |
|
241 |
-
| query_sparsity_ratio | 0.
|
242 |
-
| corpus_active_dims |
|
243 |
-
| corpus_sparsity_ratio | 0.
|
244 |
|
245 |
<!--
|
246 |
## Bias, Risks and Limitations
|
@@ -260,25 +262,25 @@ You can finetune this model on your own dataset.
|
|
260 |
|
261 |
#### Unnamed Dataset
|
262 |
|
263 |
-
* Size:
|
264 |
-
* Columns: <code>query</code>, <code>positive</code>, and <code>
|
265 |
* Approximate statistics based on the first 1000 samples:
|
266 |
-
| | query | positive |
|
267 |
-
|
268 |
-
| type | string | string | string |
|
269 |
-
| details | <ul><li>min: 4 tokens</li><li>mean:
|
270 |
* Samples:
|
271 |
-
| query
|
272 |
-
|
273 |
-
| <code>
|
274 |
-
| <code>
|
275 |
-
| <code>
|
276 |
* Loss: [<code>SpladeLoss</code>](https://sbert.net/docs/package_reference/sparse_encoder/losses.html#spladeloss) with these parameters:
|
277 |
```json
|
278 |
{
|
279 |
"loss": "SparseMultipleNegativesRankingLoss(scale=1.0, similarity_fct='dot_score')",
|
280 |
-
"document_regularizer_weight": 0.
|
281 |
-
"query_regularizer_weight": 0.
|
282 |
}
|
283 |
```
|
284 |
|
@@ -286,14 +288,17 @@ You can finetune this model on your own dataset.
|
|
286 |
#### Non-Default Hyperparameters
|
287 |
|
288 |
- `eval_strategy`: epoch
|
289 |
-
- `per_device_train_batch_size`:
|
290 |
-
- `per_device_eval_batch_size`:
|
291 |
-
- `
|
292 |
-
- `
|
|
|
293 |
- `lr_scheduler_type`: cosine
|
294 |
- `warmup_ratio`: 0.025
|
295 |
- `fp16`: True
|
|
|
296 |
- `optim`: adamw_torch_fused
|
|
|
297 |
- `batch_sampler`: no_duplicates
|
298 |
|
299 |
#### All Hyperparameters
|
@@ -303,20 +308,20 @@ You can finetune this model on your own dataset.
|
|
303 |
- `do_predict`: False
|
304 |
- `eval_strategy`: epoch
|
305 |
- `prediction_loss_only`: True
|
306 |
-
- `per_device_train_batch_size`:
|
307 |
-
- `per_device_eval_batch_size`:
|
308 |
- `per_gpu_train_batch_size`: None
|
309 |
- `per_gpu_eval_batch_size`: None
|
310 |
-
- `gradient_accumulation_steps`:
|
311 |
- `eval_accumulation_steps`: None
|
312 |
- `torch_empty_cache_steps`: None
|
313 |
-
- `learning_rate`:
|
314 |
- `weight_decay`: 0.0
|
315 |
- `adam_beta1`: 0.9
|
316 |
- `adam_beta2`: 0.999
|
317 |
- `adam_epsilon`: 1e-08
|
318 |
- `max_grad_norm`: 1.0
|
319 |
-
- `num_train_epochs`:
|
320 |
- `max_steps`: -1
|
321 |
- `lr_scheduler_type`: cosine
|
322 |
- `lr_scheduler_kwargs`: {}
|
@@ -356,7 +361,7 @@ You can finetune this model on your own dataset.
|
|
356 |
- `disable_tqdm`: False
|
357 |
- `remove_unused_columns`: True
|
358 |
- `label_names`: None
|
359 |
-
- `load_best_model_at_end`:
|
360 |
- `ignore_data_skip`: False
|
361 |
- `fsdp`: []
|
362 |
- `fsdp_min_num_params`: 0
|
@@ -377,7 +382,7 @@ You can finetune this model on your own dataset.
|
|
377 |
- `dataloader_persistent_workers`: False
|
378 |
- `skip_memory_metrics`: True
|
379 |
- `use_legacy_prediction_loop`: False
|
380 |
-
- `push_to_hub`:
|
381 |
- `resume_from_checkpoint`: None
|
382 |
- `hub_model_id`: None
|
383 |
- `hub_strategy`: every_save
|
@@ -420,23 +425,25 @@ You can finetune this model on your own dataset.
|
|
420 |
</details>
|
421 |
|
422 |
### Training Logs
|
423 |
-
| Epoch
|
424 |
-
|
425 |
-
| 1.0
|
426 |
-
| 2.0
|
427 |
-
| 3.0
|
428 |
-
| 4.0
|
429 |
-
|
|
|
|
430 |
|
|
|
431 |
|
432 |
### Framework Versions
|
433 |
-
- Python: 3.11.
|
434 |
- Sentence Transformers: 5.0.0
|
435 |
-
- Transformers: 4.53.
|
436 |
- PyTorch: 2.6.0+cu124
|
437 |
-
- Accelerate: 1.
|
438 |
- Datasets: 3.6.0
|
439 |
-
- Tokenizers: 0.21.
|
440 |
|
441 |
## Citation
|
442 |
|
|
|
8 |
- sparse
|
9 |
- splade
|
10 |
- generated_from_trainer
|
11 |
+
- dataset_size:800000
|
12 |
- loss:SpladeLoss
|
13 |
- loss:SparseMultipleNegativesRankingLoss
|
14 |
- loss:FlopsLoss
|
15 |
base_model: prajjwal1/bert-tiny
|
16 |
widget:
|
17 |
+
- text: how much do private cleaners charge per hour
|
18 |
+
- text: atlantic ocean air currents affects climate
|
19 |
+
- text: RNA polymerase is the core enzyme in transcription which needs proteins known
|
20 |
+
as transcription factors to bind to the DNA promoter. also DNA plymerase... can't
|
21 |
+
remember what about it though. DNA polymerase isn't involved in DNA transcription.
|
22 |
+
However, DNA polymerase IS involved in DNA REPLICATION.
|
23 |
+
- text: Exploit:JS/Axpergle.E Virus is a threatening Trojan horse which gets itself
|
24 |
+
loaded when you turn on your computer and eats up lots of system resources. Once
|
25 |
+
this Exploit:JS/Axpergle.E virus successfully enters your operating system, your
|
26 |
+
computer will be subjected to a variety of errors and drive you mad.) Exploit:JS/Axpergle.E
|
27 |
+
Virus corrupts the data and files saved on your computer hard drive terribly.
|
28 |
+
2) Exploit:JS/Axpergle.E Virus changes the registry entry to get itself launched
|
29 |
+
at system startup.
|
30 |
+
- text: --No depreciation deduction shall be allowed under this section (and no depreciation
|
31 |
+
or amortization deduction shall be allowed under any other provision of this subtitle)
|
32 |
+
to the taxpayer for any term interest in property for any period during which
|
33 |
+
the remainder interest in such property is held (directly or indirectly) by a
|
34 |
+
related person.
|
35 |
pipeline_tag: feature-extraction
|
36 |
library_name: sentence-transformers
|
37 |
metrics:
|
|
|
65 |
type: unknown
|
66 |
metrics:
|
67 |
- type: dot_accuracy@1
|
68 |
+
value: 0.457
|
69 |
name: Dot Accuracy@1
|
70 |
- type: dot_accuracy@3
|
71 |
+
value: 0.7572
|
72 |
name: Dot Accuracy@3
|
73 |
- type: dot_accuracy@5
|
74 |
+
value: 0.8574
|
75 |
name: Dot Accuracy@5
|
76 |
- type: dot_accuracy@10
|
77 |
+
value: 0.929
|
78 |
name: Dot Accuracy@10
|
79 |
- type: dot_precision@1
|
80 |
+
value: 0.457
|
81 |
name: Dot Precision@1
|
82 |
- type: dot_precision@3
|
83 |
+
value: 0.25906666666666667
|
84 |
name: Dot Precision@3
|
85 |
- type: dot_precision@5
|
86 |
+
value: 0.178
|
87 |
name: Dot Precision@5
|
88 |
- type: dot_precision@10
|
89 |
+
value: 0.09714
|
90 |
name: Dot Precision@10
|
91 |
- type: dot_recall@1
|
92 |
+
value: 0.44155
|
93 |
name: Dot Recall@1
|
94 |
- type: dot_recall@3
|
95 |
+
value: 0.7427833333333334
|
96 |
name: Dot Recall@3
|
97 |
- type: dot_recall@5
|
98 |
+
value: 0.8471666666666666
|
99 |
name: Dot Recall@5
|
100 |
- type: dot_recall@10
|
101 |
+
value: 0.9223
|
102 |
name: Dot Recall@10
|
103 |
- type: dot_ndcg@10
|
104 |
+
value: 0.6931598312411338
|
105 |
name: Dot Ndcg@10
|
106 |
- type: dot_mrr@10
|
107 |
+
value: 0.6234866666666686
|
108 |
name: Dot Mrr@10
|
109 |
- type: dot_map@100
|
110 |
+
value: 0.6191148055389254
|
111 |
name: Dot Map@100
|
112 |
- type: query_active_dims
|
113 |
+
value: 21.215999603271484
|
114 |
name: Query Active Dims
|
115 |
- type: query_sparsity_ratio
|
116 |
+
value: 0.9993048948429568
|
117 |
name: Query Sparsity Ratio
|
118 |
- type: corpus_active_dims
|
119 |
+
value: 159.5419082486014
|
120 |
name: Corpus Active Dims
|
121 |
- type: corpus_sparsity_ratio
|
122 |
+
value: 0.99477288813811
|
123 |
name: Corpus Sparsity Ratio
|
124 |
---
|
125 |
|
|
|
169 |
from sentence_transformers import SparseEncoder
|
170 |
|
171 |
# Download from the 🤗 Hub
|
172 |
+
model = SparseEncoder("yosefw/SPLADE-BERT-Tiny-v2")
|
173 |
# Run inference
|
174 |
queries = [
|
175 |
+
"what code section is depreciation",
|
176 |
]
|
177 |
documents = [
|
178 |
+
'Section 179 depreciation deduction. Section 179 of the United States Internal Revenue Code (26 U.S.C. § 179), allows a taxpayer to elect to deduct the cost of certain types of property on their income taxes as an expense, rather than requiring the cost of the property to be capitalized and depreciated.',
|
179 |
+
'--No depreciation deduction shall be allowed under this section (and no depreciation or amortization deduction shall be allowed under any other provision of this subtitle) to the taxpayer for any term interest in property for any period during which the remainder interest in such property is held (directly or indirectly) by a related person.',
|
180 |
+
'Depreciation - Amortization Code. Refer to the IRS Instructions for Form 4562, Line 42, for the amortization code.',
|
181 |
]
|
182 |
query_embeddings = model.encode_query(queries)
|
183 |
document_embeddings = model.encode_document(documents)
|
|
|
187 |
# Get the similarity scores for the embeddings
|
188 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
189 |
print(similarities)
|
190 |
+
# tensor([[17.0167, 11.4943, 13.8083]])
|
191 |
```
|
192 |
|
193 |
<!--
|
|
|
224 |
|
225 |
| Metric | Value |
|
226 |
|:----------------------|:-----------|
|
227 |
+
| dot_accuracy@1 | 0.457 |
|
228 |
+
| dot_accuracy@3 | 0.7572 |
|
229 |
+
| dot_accuracy@5 | 0.8574 |
|
230 |
+
| dot_accuracy@10 | 0.929 |
|
231 |
+
| dot_precision@1 | 0.457 |
|
232 |
+
| dot_precision@3 | 0.2591 |
|
233 |
+
| dot_precision@5 | 0.178 |
|
234 |
+
| dot_precision@10 | 0.0971 |
|
235 |
+
| dot_recall@1 | 0.4415 |
|
236 |
+
| dot_recall@3 | 0.7428 |
|
237 |
+
| dot_recall@5 | 0.8472 |
|
238 |
+
| dot_recall@10 | 0.9223 |
|
239 |
+
| **dot_ndcg@10** | **0.6932** |
|
240 |
+
| dot_mrr@10 | 0.6235 |
|
241 |
+
| dot_map@100 | 0.6191 |
|
242 |
+
| query_active_dims | 21.216 |
|
243 |
+
| query_sparsity_ratio | 0.9993 |
|
244 |
+
| corpus_active_dims | 159.5419 |
|
245 |
+
| corpus_sparsity_ratio | 0.9948 |
|
246 |
|
247 |
<!--
|
248 |
## Bias, Risks and Limitations
|
|
|
262 |
|
263 |
#### Unnamed Dataset
|
264 |
|
265 |
+
* Size: 800,000 training samples
|
266 |
+
* Columns: <code>query</code>, <code>positive</code>, <code>negative_1</code>, and <code>negative_2</code>
|
267 |
* Approximate statistics based on the first 1000 samples:
|
268 |
+
| | query | positive | negative_1 | negative_2 |
|
269 |
+
|:--------|:---------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
270 |
+
| type | string | string | string | string |
|
271 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 9.03 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 81.92 tokens</li><li>max: 220 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 78.63 tokens</li><li>max: 227 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 78.11 tokens</li><li>max: 236 tokens</li></ul> |
|
272 |
* Samples:
|
273 |
+
| query | positive | negative_1 | negative_2 |
|
274 |
+
|:-------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
275 |
+
| <code>definition of vas deferens</code> | <code>Vas deferens: The tube that connects the testes with the urethra. The vas deferens is a coiled duct that conveys sperm from the epididymis to the ejaculatory duct and the urethra.</code> | <code>For further discussion of the vas deferens within the context of the structures and functions of reproduction and sexuality, please see the overview section “The Reproductive System.”. See also FERTILITY; TESTICLES; VASECTOMY.</code> | <code>1 Testicular cancer symptoms include a painless lump or swelling in a testicle, testicle or scrotum pain, a dull ache in the abdomen, back, or groin, and. 2 Urinary Tract Infections (UTIs) A urinary tract infection (UTI) is an infection of the bladder, kidneys, ureters, or urethra.</code> |
|
276 |
+
| <code>how old is kieron williamson</code> | <code>Kieron Williamson – the latest artist to be part of GoGoDragons! April 21, 2015. A 12-year-old artist, nicknamed Mini-Monet, is to unveil a sculpture of a dragon he has painted for GoGoDragons. Kieron Williamson, from Norfolk, who has so far earned about £2m, painted the 5ft-tall (1.5m) dragon for the event in Norwich.</code> | <code>8-year-old artist: Don't call me Monet. London, England (CNN) -- He has the deft brush strokes of a seasoned artist, but Kieron Williamson is just eight years old. The boy from Norfolk, in eastern England, has been hailed by the British press as a mini Monet, a reference to the famous French impressionist.</code> | <code>Needless to say, this site does not tell you much about his football career (yet!), but the website will tell you everything there is to know about Kieron Williamson’s passion for oil, watercolour and pastel,</code> |
|
277 |
+
| <code>when do you start showing third pregnancy</code> | <code>Yes | No Thank you! I am pregnant with my third child and I am definitly showing at 10 weeks. I am starting to wear some maternity clothes. My low low rise pre-pregnancy jeans still work. My biggest problem is shirts, but fortunately the style right now is loose shirts that look maternity.</code> | <code>Some women do not start to show until they are well into their second trimester or even the start of their third trimester. If you are overweight at the start of your pregnancy, you may not gain as much weight during your pregnancy and may not begin to show until later into your pregnancy. Average: 3.591215.</code> | <code>There isn't a set time when moms-to-be start sporting an obviously pregnant belly – every woman is different. Some women keep their pre-pregnancy belly far into the second trimester, while others start showing in the first trimester.</code> |
|
278 |
* Loss: [<code>SpladeLoss</code>](https://sbert.net/docs/package_reference/sparse_encoder/losses.html#spladeloss) with these parameters:
|
279 |
```json
|
280 |
{
|
281 |
"loss": "SparseMultipleNegativesRankingLoss(scale=1.0, similarity_fct='dot_score')",
|
282 |
+
"document_regularizer_weight": 0.003,
|
283 |
+
"query_regularizer_weight": 0.005
|
284 |
}
|
285 |
```
|
286 |
|
|
|
288 |
#### Non-Default Hyperparameters
|
289 |
|
290 |
- `eval_strategy`: epoch
|
291 |
+
- `per_device_train_batch_size`: 16
|
292 |
+
- `per_device_eval_batch_size`: 16
|
293 |
+
- `gradient_accumulation_steps`: 4
|
294 |
+
- `learning_rate`: 6e-05
|
295 |
+
- `num_train_epochs`: 6
|
296 |
- `lr_scheduler_type`: cosine
|
297 |
- `warmup_ratio`: 0.025
|
298 |
- `fp16`: True
|
299 |
+
- `load_best_model_at_end`: True
|
300 |
- `optim`: adamw_torch_fused
|
301 |
+
- `push_to_hub`: True
|
302 |
- `batch_sampler`: no_duplicates
|
303 |
|
304 |
#### All Hyperparameters
|
|
|
308 |
- `do_predict`: False
|
309 |
- `eval_strategy`: epoch
|
310 |
- `prediction_loss_only`: True
|
311 |
+
- `per_device_train_batch_size`: 16
|
312 |
+
- `per_device_eval_batch_size`: 16
|
313 |
- `per_gpu_train_batch_size`: None
|
314 |
- `per_gpu_eval_batch_size`: None
|
315 |
+
- `gradient_accumulation_steps`: 4
|
316 |
- `eval_accumulation_steps`: None
|
317 |
- `torch_empty_cache_steps`: None
|
318 |
+
- `learning_rate`: 6e-05
|
319 |
- `weight_decay`: 0.0
|
320 |
- `adam_beta1`: 0.9
|
321 |
- `adam_beta2`: 0.999
|
322 |
- `adam_epsilon`: 1e-08
|
323 |
- `max_grad_norm`: 1.0
|
324 |
+
- `num_train_epochs`: 6
|
325 |
- `max_steps`: -1
|
326 |
- `lr_scheduler_type`: cosine
|
327 |
- `lr_scheduler_kwargs`: {}
|
|
|
361 |
- `disable_tqdm`: False
|
362 |
- `remove_unused_columns`: True
|
363 |
- `label_names`: None
|
364 |
+
- `load_best_model_at_end`: True
|
365 |
- `ignore_data_skip`: False
|
366 |
- `fsdp`: []
|
367 |
- `fsdp_min_num_params`: 0
|
|
|
382 |
- `dataloader_persistent_workers`: False
|
383 |
- `skip_memory_metrics`: True
|
384 |
- `use_legacy_prediction_loop`: False
|
385 |
+
- `push_to_hub`: True
|
386 |
- `resume_from_checkpoint`: None
|
387 |
- `hub_model_id`: None
|
388 |
- `hub_strategy`: every_save
|
|
|
425 |
</details>
|
426 |
|
427 |
### Training Logs
|
428 |
+
| Epoch | Step | Training Loss | dot_ndcg@10 |
|
429 |
+
|:-------:|:---------:|:-------------:|:-----------:|
|
430 |
+
| 1.0 | 12500 | 11.5771 | 0.6587 |
|
431 |
+
| 2.0 | 25000 | 0.7888 | 0.6810 |
|
432 |
+
| 3.0 | 37500 | 0.7271 | 0.6884 |
|
433 |
+
| 4.0 | 50000 | 0.6774 | 0.6920 |
|
434 |
+
| 5.0 | 62500 | 0.6436 | 0.6912 |
|
435 |
+
| **6.0** | **75000** | **0.6274** | **0.6932** |
|
436 |
|
437 |
+
* The bold row denotes the saved checkpoint.
|
438 |
|
439 |
### Framework Versions
|
440 |
+
- Python: 3.11.11
|
441 |
- Sentence Transformers: 5.0.0
|
442 |
+
- Transformers: 4.53.1
|
443 |
- PyTorch: 2.6.0+cu124
|
444 |
+
- Accelerate: 1.5.2
|
445 |
- Datasets: 3.6.0
|
446 |
+
- Tokenizers: 0.21.1
|
447 |
|
448 |
## Citation
|
449 |
|
config.json
CHANGED
@@ -17,7 +17,7 @@
|
|
17 |
"pad_token_id": 0,
|
18 |
"position_embedding_type": "absolute",
|
19 |
"torch_dtype": "float32",
|
20 |
-
"transformers_version": "4.53.
|
21 |
"type_vocab_size": 2,
|
22 |
"use_cache": true,
|
23 |
"vocab_size": 30522
|
|
|
17 |
"pad_token_id": 0,
|
18 |
"position_embedding_type": "absolute",
|
19 |
"torch_dtype": "float32",
|
20 |
+
"transformers_version": "4.53.1",
|
21 |
"type_vocab_size": 2,
|
22 |
"use_cache": true,
|
23 |
"vocab_size": 30522
|
config_sentence_transformers.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"model_type": "SparseEncoder",
|
3 |
"__version__": {
|
4 |
"sentence_transformers": "5.0.0",
|
5 |
-
"transformers": "4.53.
|
6 |
"pytorch": "2.6.0+cu124"
|
7 |
},
|
8 |
"prompts": {
|
|
|
2 |
"model_type": "SparseEncoder",
|
3 |
"__version__": {
|
4 |
"sentence_transformers": "5.0.0",
|
5 |
+
"transformers": "4.53.1",
|
6 |
"pytorch": "2.6.0+cu124"
|
7 |
},
|
8 |
"prompts": {
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17671560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b234281fbc722918304ccf3e74f1b216cc2deb2c3197034a6bf90f2a6b7e3fe
|
3 |
size 17671560
|
special_tokens_map.json
CHANGED
@@ -1,7 +1,37 @@
|
|
1 |
{
|
2 |
-
"cls_token":
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
}
|
tokenizer_config.json
CHANGED
@@ -47,12 +47,19 @@
|
|
47 |
"do_lower_case": true,
|
48 |
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
50 |
-
"
|
|
|
51 |
"never_split": null,
|
|
|
52 |
"pad_token": "[PAD]",
|
|
|
|
|
53 |
"sep_token": "[SEP]",
|
|
|
54 |
"strip_accents": null,
|
55 |
"tokenize_chinese_chars": true,
|
56 |
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
57 |
"unk_token": "[UNK]"
|
58 |
}
|
|
|
47 |
"do_lower_case": true,
|
48 |
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
50 |
+
"max_length": 512,
|
51 |
+
"model_max_length": 512,
|
52 |
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
"strip_accents": null,
|
60 |
"tokenize_chinese_chars": true,
|
61 |
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
"unk_token": "[UNK]"
|
65 |
}
|