Tom Aarsen
commited on
Commit
·
395ad23
1
Parent(s):
ad4c588
Convert the model to a SequenceClassification variant
Browse files- README.md +130 -7
- config.json +10 -3
- model.safetensors +2 -2
README.md
CHANGED
@@ -2,7 +2,10 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-0.6B-Base
|
5 |
-
|
|
|
|
|
|
|
6 |
---
|
7 |
# Qwen3-Reranker-0.6B
|
8 |
|
@@ -10,6 +13,11 @@ library_name: transformers
|
|
10 |
<img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/logo_qwen3.png" width="400"/>
|
11 |
<p>
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
## Highlights
|
14 |
|
15 |
The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.
|
@@ -55,7 +63,116 @@ With Transformers versions earlier than 4.51.0, you may encounter the following
|
|
55 |
KeyError: 'qwen3'
|
56 |
```
|
57 |
|
58 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
```python
|
61 |
# Requires transformers>=4.51.0
|
@@ -105,13 +222,18 @@ suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
|
|
105 |
|
106 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
107 |
|
108 |
-
queries = [
|
109 |
-
"
|
|
|
|
|
|
|
110 |
]
|
111 |
|
112 |
documents = [
|
113 |
-
"
|
114 |
-
"
|
|
|
|
|
115 |
]
|
116 |
|
117 |
pairs = [format_instruction(task, query, doc) for query, doc in zip(queries, documents)]
|
@@ -121,10 +243,11 @@ inputs = process_inputs(pairs)
|
|
121 |
scores = compute_logits(inputs)
|
122 |
|
123 |
print("scores: ", scores)
|
|
|
124 |
```
|
125 |
|
126 |
|
127 |
-
|
128 |
|
129 |
```python
|
130 |
# Requires vllm>=0.8.5
|
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-0.6B-Base
|
5 |
+
tags:
|
6 |
+
- transformers
|
7 |
+
- sentence-transformers
|
8 |
+
pipeline_tag: text-ranking
|
9 |
---
|
10 |
# Qwen3-Reranker-0.6B
|
11 |
|
|
|
13 |
<img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/logo_qwen3.png" width="400"/>
|
14 |
<p>
|
15 |
|
16 |
+
> [!NOTE]
|
17 |
+
> This is a copy of the [Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B) model, part of the [Qwen3 Reranker series](https://huggingface.co/collections/Qwen/qwen3-reranker-6841b22d0192d7ade9cdefea), modified as a sequence classification model instead. See [Updated Usage](#updated-usage) for details on how to use it, or [Original Usage](#original-usage) for the original usage.
|
18 |
+
>
|
19 |
+
> See [this discussion](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B/discussions/3) for details on the conversion approach.
|
20 |
+
|
21 |
## Highlights
|
22 |
|
23 |
The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.
|
|
|
63 |
KeyError: 'qwen3'
|
64 |
```
|
65 |
|
66 |
+
### Updated Usage
|
67 |
+
|
68 |
+
#### Updated Sentence Transformers Usage
|
69 |
+
|
70 |
+
```python
|
71 |
+
# Requires transformers>=4.51.0
|
72 |
+
from sentence_transformers import CrossEncoder
|
73 |
+
|
74 |
+
|
75 |
+
def format_queries(query, instruction=None):
|
76 |
+
prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
|
77 |
+
if instruction is None:
|
78 |
+
instruction = (
|
79 |
+
"Given a web search query, retrieve relevant passages that answer the query"
|
80 |
+
)
|
81 |
+
return f"{prefix}<Instruct>: {instruction}\n<Query>: {query}\n"
|
82 |
+
|
83 |
+
|
84 |
+
def format_document(document):
|
85 |
+
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
|
86 |
+
return f"<Document>: {document}{suffix}"
|
87 |
+
|
88 |
+
|
89 |
+
model = CrossEncoder("tomaarsen/Qwen3-Reranker-0.6B")
|
90 |
+
|
91 |
+
task = "Given a web search query, retrieve relevant passages that answer the query"
|
92 |
+
|
93 |
+
queries = [
|
94 |
+
"Which planet is known as the Red Planet?",
|
95 |
+
"Which planet is known as the Red Planet?",
|
96 |
+
"Which planet is known as the Red Planet?",
|
97 |
+
"Which planet is known as the Red Planet?",
|
98 |
+
]
|
99 |
+
|
100 |
+
documents = [
|
101 |
+
"Venus is often called Earth's twin because of its similar size and proximity.",
|
102 |
+
"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
|
103 |
+
"Jupiter, the largest planet in our solar system, has a prominent red spot.",
|
104 |
+
"Saturn, famous for its rings, is sometimes mistaken for the Red Planet.",
|
105 |
+
]
|
106 |
+
|
107 |
+
pairs = [
|
108 |
+
[format_queries(query, task), format_document(doc)]
|
109 |
+
for query, doc in zip(queries, documents)
|
110 |
+
]
|
111 |
+
scores = model.predict(pairs)
|
112 |
+
print(scores.tolist())
|
113 |
+
# [0.04272603616118431, 0.9991921782493591, 0.40642625093460083, 0.9718492031097412]
|
114 |
+
```
|
115 |
+
|
116 |
+
#### Updated Transformers Usage
|
117 |
+
|
118 |
+
```python
|
119 |
+
# Requires transformers>=4.51.0
|
120 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
121 |
+
|
122 |
+
|
123 |
+
def format_instruction(instruction, query, doc):
|
124 |
+
prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
|
125 |
+
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
|
126 |
+
if instruction is None:
|
127 |
+
instruction = (
|
128 |
+
"Given a web search query, retrieve relevant passages that answer the query"
|
129 |
+
)
|
130 |
+
output = f"{prefix}<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}{suffix}"
|
131 |
+
return output
|
132 |
+
|
133 |
+
|
134 |
+
tokenizer = AutoTokenizer.from_pretrained("tomaarsen/Qwen3-Reranker-0.6B", padding_side="left")
|
135 |
+
model = AutoModelForSequenceClassification.from_pretrained("tomaarsen/Qwen3-Reranker-0.6B").eval()
|
136 |
+
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
137 |
+
# model = AutoModelForSequenceClassification.from_pretrained("tomaarsen/Qwen3-Reranker-0.6B", torch_dtype=torch.float16, attn_implementation="flash_attention_2").cuda().eval()
|
138 |
+
max_length = 8192
|
139 |
+
|
140 |
+
task = "Given a web search query, retrieve relevant passages that answer the query"
|
141 |
+
|
142 |
+
queries = [
|
143 |
+
"Which planet is known as the Red Planet?",
|
144 |
+
"Which planet is known as the Red Planet?",
|
145 |
+
"Which planet is known as the Red Planet?",
|
146 |
+
"Which planet is known as the Red Planet?",
|
147 |
+
]
|
148 |
+
|
149 |
+
documents = [
|
150 |
+
"Venus is often called Earth's twin because of its similar size and proximity.",
|
151 |
+
"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
|
152 |
+
"Jupiter, the largest planet in our solar system, has a prominent red spot.",
|
153 |
+
"Saturn, famous for its rings, is sometimes mistaken for the Red Planet.",
|
154 |
+
]
|
155 |
+
|
156 |
+
pairs = [format_instruction(task, query, doc) for query, doc in zip(queries, documents)]
|
157 |
+
inputs = tokenizer(
|
158 |
+
pairs,
|
159 |
+
padding=True,
|
160 |
+
truncation=True,
|
161 |
+
max_length=max_length,
|
162 |
+
return_tensors="pt",
|
163 |
+
)
|
164 |
+
logits = model(**inputs).logits.squeeze()
|
165 |
+
print(logits.tolist())
|
166 |
+
# [-3.109282970428467, 7.120373725891113, -0.37874650955200195, 3.5416228771209717]
|
167 |
+
|
168 |
+
scores = logits.sigmoid()
|
169 |
+
print(scores.tolist())
|
170 |
+
# [0.04272596165537834, 0.9991921782493591, 0.406429260969162, 0.9718491435050964]
|
171 |
+
```
|
172 |
+
|
173 |
+
### Original Usage
|
174 |
+
|
175 |
+
#### Original Transformers Usage
|
176 |
|
177 |
```python
|
178 |
# Requires transformers>=4.51.0
|
|
|
222 |
|
223 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
224 |
|
225 |
+
queries = [
|
226 |
+
"Which planet is known as the Red Planet?",
|
227 |
+
"Which planet is known as the Red Planet?",
|
228 |
+
"Which planet is known as the Red Planet?",
|
229 |
+
"Which planet is known as the Red Planet?",
|
230 |
]
|
231 |
|
232 |
documents = [
|
233 |
+
"Venus is often called Earth's twin because of its similar size and proximity.",
|
234 |
+
"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
|
235 |
+
"Jupiter, the largest planet in our solar system, has a prominent red spot.",
|
236 |
+
"Saturn, famous for its rings, is sometimes mistaken for the Red Planet.",
|
237 |
]
|
238 |
|
239 |
pairs = [format_instruction(task, query, doc) for query, doc in zip(queries, documents)]
|
|
|
243 |
scores = compute_logits(inputs)
|
244 |
|
245 |
print("scores: ", scores)
|
246 |
+
# scores: [0.04272589832544327, 0.9991921782493591, 0.40642935037612915, 0.9718492031097412]
|
247 |
```
|
248 |
|
249 |
|
250 |
+
#### Original vLLM Usage
|
251 |
|
252 |
```python
|
253 |
# Requires vllm>=0.8.5
|
config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"architectures": [
|
3 |
-
"
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
@@ -9,21 +9,28 @@
|
|
9 |
"head_dim": 128,
|
10 |
"hidden_act": "silu",
|
11 |
"hidden_size": 1024,
|
|
|
|
|
|
|
12 |
"initializer_range": 0.02,
|
13 |
"intermediate_size": 3072,
|
|
|
|
|
|
|
14 |
"max_position_embeddings": 40960,
|
15 |
"max_window_layers": 28,
|
16 |
"model_type": "qwen3",
|
17 |
"num_attention_heads": 16,
|
18 |
"num_hidden_layers": 28,
|
19 |
"num_key_value_heads": 8,
|
|
|
20 |
"rms_norm_eps": 1e-06,
|
21 |
"rope_scaling": null,
|
22 |
"rope_theta": 1000000,
|
23 |
"sliding_window": null,
|
24 |
"tie_word_embeddings": true,
|
25 |
-
"torch_dtype": "
|
26 |
-
"transformers_version": "4.
|
27 |
"use_cache": true,
|
28 |
"use_sliding_window": false,
|
29 |
"vocab_size": 151669
|
|
|
1 |
{
|
2 |
"architectures": [
|
3 |
+
"Qwen3ForSequenceClassification"
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
|
|
9 |
"head_dim": 128,
|
10 |
"hidden_act": "silu",
|
11 |
"hidden_size": 1024,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
"initializer_range": 0.02,
|
16 |
"intermediate_size": 3072,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
"max_position_embeddings": 40960,
|
21 |
"max_window_layers": 28,
|
22 |
"model_type": "qwen3",
|
23 |
"num_attention_heads": 16,
|
24 |
"num_hidden_layers": 28,
|
25 |
"num_key_value_heads": 8,
|
26 |
+
"pad_token_id": 151643,
|
27 |
"rms_norm_eps": 1e-06,
|
28 |
"rope_scaling": null,
|
29 |
"rope_theta": 1000000,
|
30 |
"sliding_window": null,
|
31 |
"tie_word_embeddings": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.52.4",
|
34 |
"use_cache": true,
|
35 |
"use_sliding_window": false,
|
36 |
"vocab_size": 151669
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b223e576dc70d8832372a538d4c8458dc25ce9daf209ac47cec4799c85e4da7
|
3 |
+
size 2383145520
|