Add sentence_bert_config.json
#7
by
yjoonjang
- opened
README.md
CHANGED
@@ -62,7 +62,6 @@ KeyError: 'qwen3'
|
|
62 |
|
63 |
```python
|
64 |
# Requires transformers>=4.51.0
|
65 |
-
# Requires sentence-transformers>=2.7.0
|
66 |
|
67 |
from sentence_transformers import SentenceTransformer
|
68 |
|
@@ -166,41 +165,6 @@ scores = (embeddings[:2] @ embeddings[2:].T)
|
|
166 |
print(scores.tolist())
|
167 |
# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]
|
168 |
```
|
169 |
-
|
170 |
-
### vLLM Usage
|
171 |
-
|
172 |
-
```python
|
173 |
-
# Requires vllm>=0.8.5
|
174 |
-
import torch
|
175 |
-
import vllm
|
176 |
-
from vllm import LLM
|
177 |
-
|
178 |
-
def get_detailed_instruct(task_description: str, query: str) -> str:
|
179 |
-
return f'Instruct: {task_description}\nQuery:{query}'
|
180 |
-
|
181 |
-
# Each query must come with a one-sentence instruction that describes the task
|
182 |
-
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
183 |
-
|
184 |
-
queries = [
|
185 |
-
get_detailed_instruct(task, 'What is the capital of China?'),
|
186 |
-
get_detailed_instruct(task, 'Explain gravity')
|
187 |
-
]
|
188 |
-
# No need to add instruction for retrieval documents
|
189 |
-
documents = [
|
190 |
-
"The capital of China is Beijing.",
|
191 |
-
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
|
192 |
-
]
|
193 |
-
input_texts = queries + documents
|
194 |
-
|
195 |
-
model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
|
196 |
-
|
197 |
-
outputs = model.embed(input_texts)
|
198 |
-
embeddings = torch.tensor([o.outputs.embedding for o in outputs])
|
199 |
-
scores = (embeddings[:2] @ embeddings[2:].T)
|
200 |
-
print(scores.tolist())
|
201 |
-
# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
|
202 |
-
```
|
203 |
-
|
204 |
๐ **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
205 |
|
206 |
## Evaluation
|
@@ -258,10 +222,11 @@ print(scores.tolist())
|
|
258 |
If you find our work helpful, feel free to give us a cite.
|
259 |
|
260 |
```
|
261 |
-
@
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
266 |
}
|
267 |
```
|
|
|
62 |
|
63 |
```python
|
64 |
# Requires transformers>=4.51.0
|
|
|
65 |
|
66 |
from sentence_transformers import SentenceTransformer
|
67 |
|
|
|
165 |
print(scores.tolist())
|
166 |
# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]
|
167 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
๐ **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
169 |
|
170 |
## Evaluation
|
|
|
222 |
If you find our work helpful, feel free to give us a cite.
|
223 |
|
224 |
```
|
225 |
+
@misc{qwen3-embedding,
|
226 |
+
title = {Qwen3-Embedding},
|
227 |
+
url = {https://qwenlm.github.io/blog/qwen3/},
|
228 |
+
author = {Qwen Team},
|
229 |
+
month = {May},
|
230 |
+
year = {2025}
|
231 |
}
|
232 |
```
|