Automatically add EOS via Tokenizer, integrate Sentence Transformers (#2)
Browse files- Automatically add EOS via Tokenizer, integrate Sentence Transformers (4ee1aa535260ccb3eb883af36bac4314e0c3bbf7)
- Add "device_map": "auto" to automatically move the model to CUDA if possible (2f6ecfd654caa7ef022fa415b53ede194322d068)
- Remove eod_id line from README (2653833944d8c3c22a0139b6174051f7ce72879f)
- 1_Pooling/config.json +10 -0
- README.md +54 -11
- config_sentence_transformers.json +8 -0
- modules.json +20 -0
- tokenizer.json +2 -2
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -2,7 +2,11 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-0.6B-Base
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
---
|
7 |
# Qwen3-Embedding-0.6B
|
8 |
|
@@ -54,6 +58,47 @@ With Transformers versions earlier than 4.51.0, you may encounter the following
|
|
54 |
KeyError: 'qwen3'
|
55 |
```
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
### Transformers Usage
|
58 |
|
59 |
```python
|
@@ -80,14 +125,6 @@ def last_token_pool(last_hidden_states: Tensor,
|
|
80 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
81 |
return f'Instruct: {task_description}\nQuery:{query}'
|
82 |
|
83 |
-
def tokenize(tokenizer, input_texts, eod_id, max_length):
|
84 |
-
batch_dict = tokenizer(input_texts, padding=False, truncation=True, max_length=max_length-2)
|
85 |
-
for seq, att in zip(batch_dict["input_ids"], batch_dict["attention_mask"]):
|
86 |
-
seq.append(eod_id)
|
87 |
-
att.append(1)
|
88 |
-
batch_dict = tokenizer.pad(batch_dict, padding=True, return_tensors="pt")
|
89 |
-
return batch_dict
|
90 |
-
|
91 |
# Each query must come with a one-sentence instruction that describes the task
|
92 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
93 |
|
@@ -108,11 +145,16 @@ model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')
|
|
108 |
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
109 |
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
110 |
|
111 |
-
eod_id = tokenizer.convert_tokens_to_ids("<|endoftext|>")
|
112 |
max_length = 8192
|
113 |
|
114 |
# Tokenize the input texts
|
115 |
-
batch_dict =
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
batch_dict.to(model.device)
|
117 |
outputs = model(**batch_dict)
|
118 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
@@ -121,6 +163,7 @@ embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_ma
|
|
121 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
122 |
scores = (embeddings[:2] @ embeddings[2:].T)
|
123 |
print(scores.tolist())
|
|
|
124 |
```
|
125 |
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
126 |
|
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-0.6B-Base
|
5 |
+
tags:
|
6 |
+
- transformers
|
7 |
+
- sentence-transformers
|
8 |
+
- sentence-similarity
|
9 |
+
- feature-extraction
|
10 |
---
|
11 |
# Qwen3-Embedding-0.6B
|
12 |
|
|
|
58 |
KeyError: 'qwen3'
|
59 |
```
|
60 |
|
61 |
+
### Sentence Transformers Usage
|
62 |
+
|
63 |
+
```python
|
64 |
+
# Requires transformers>=4.51.0
|
65 |
+
|
66 |
+
from sentence_transformers import SentenceTransformer
|
67 |
+
|
68 |
+
# Load the model
|
69 |
+
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
|
70 |
+
|
71 |
+
# We recommend enabling flash_attention_2 for better acceleration and memory saving,
|
72 |
+
# together with setting `padding_side` to "left":
|
73 |
+
# model = SentenceTransformer(
|
74 |
+
# "Qwen/Qwen3-Embedding-0.6B",
|
75 |
+
# model_kwargs={"attn_implementation": "flash_attention_2", "device_map": "auto"},
|
76 |
+
# tokenizer_kwargs={"padding_side": "left"},
|
77 |
+
# )
|
78 |
+
|
79 |
+
# The queries and documents to embed
|
80 |
+
queries = [
|
81 |
+
"What is the capital of China?",
|
82 |
+
"Explain gravity",
|
83 |
+
]
|
84 |
+
documents = [
|
85 |
+
"The capital of China is Beijing.",
|
86 |
+
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
|
87 |
+
]
|
88 |
+
|
89 |
+
# Encode the queries and documents. Note that queries benefit from using a prompt
|
90 |
+
# Here we use the prompt called "query" stored under `model.prompts`, but you can
|
91 |
+
# also pass your own prompt via the `prompt` argument
|
92 |
+
query_embeddings = model.encode(queries, prompt_name="query")
|
93 |
+
document_embeddings = model.encode(documents)
|
94 |
+
|
95 |
+
# Compute the (cosine) similarity between the query and document embeddings
|
96 |
+
similarity = model.similarity(query_embeddings, document_embeddings)
|
97 |
+
print(similarity)
|
98 |
+
# tensor([[0.7646, 0.1414],
|
99 |
+
# [0.1355, 0.6000]])
|
100 |
+
```
|
101 |
+
|
102 |
### Transformers Usage
|
103 |
|
104 |
```python
|
|
|
125 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
126 |
return f'Instruct: {task_description}\nQuery:{query}'
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
# Each query must come with a one-sentence instruction that describes the task
|
129 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
130 |
|
|
|
145 |
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
146 |
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
147 |
|
|
|
148 |
max_length = 8192
|
149 |
|
150 |
# Tokenize the input texts
|
151 |
+
batch_dict = tokenizer(
|
152 |
+
input_texts,
|
153 |
+
padding=True,
|
154 |
+
truncation=True,
|
155 |
+
max_length=max_length,
|
156 |
+
return_tensors="pt",
|
157 |
+
)
|
158 |
batch_dict.to(model.device)
|
159 |
outputs = model(**batch_dict)
|
160 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
|
|
163 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
164 |
scores = (embeddings[:2] @ embeddings[2:].T)
|
165 |
print(scores.tolist())
|
166 |
+
# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]
|
167 |
```
|
168 |
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
169 |
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"prompts": {
|
3 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
4 |
+
"document": ""
|
5 |
+
},
|
6 |
+
"default_prompt_name": null,
|
7 |
+
"similarity_fn_name": "cosine"
|
8 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:def76fb086971c7867b829c23a26261e38d9d74e02139253b38aeb9df8b4b50a
|
3 |
+
size 11423705
|