JustinLin610 commited on
Commit
21c44fb
·
verified ·
1 Parent(s): 92422d7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -26
README.md CHANGED
@@ -5,7 +5,7 @@ license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
5
  pipeline_tag: text-generation
6
  ---
7
 
8
- # Qwen3-235B-A22B
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
@@ -48,16 +48,29 @@ KeyError: 'qwen3'
48
  ```
49
 
50
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
 
51
  ```python
52
  from mlx_lm import load, generate
 
53
  model, tokenizer = load("Qwen/Qwen3-235B-A22B-MLX-4bit")
54
- prompt = "hello, Introduce yourself, and what can you do ?"
 
55
  if tokenizer.chat_template is not None:
56
  messages = [{"role": "user", "content": prompt}]
57
  prompt = tokenizer.apply_chat_template(
58
- messages, add_generation_prompt=True
 
59
  )
60
- response = generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=1024)
 
 
 
 
 
 
 
 
 
61
  ```
62
 
63
  ## Switching Between Thinking and Non-Thinking Mode
@@ -111,6 +124,8 @@ Here is an example of a multi-turn conversation:
111
 
112
  ```python
113
  from mlx_lm import load, generate
 
 
114
  class QwenChatbot:
115
  def __init__(self, model_name="Qwen/Qwen3-235B-A22B-MLX-4bit"):
116
  self.model, self.tokenizer = load(model_name)
@@ -125,29 +140,36 @@ class QwenChatbot:
125
  add_generation_prompt=True
126
  )
127
 
128
- response = generate(self.model, self.tokenizer, prompt=text, verbose=True, max_tokens=32768)
 
 
 
 
 
 
129
  # Update history
130
  self.history.append({"role": "user", "content": user_input})
131
  self.history.append({"role": "assistant", "content": response})
132
 
133
  return response
134
 
 
135
  # Example Usage
136
  if __name__ == "__main__":
137
  chatbot = QwenChatbot()
138
 
139
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
140
- user_input_1 = "How many r's in strawberries?"
141
  print(f"User: {user_input_1}")
142
  response_1 = chatbot.generate_response(user_input_1)
143
  print(f"Bot: {response_1}")
144
  print("----------------------")
145
 
146
  # Second input with /no_think
147
- user_input_2 = "Then, how many r's in blueberries? /no_think"
148
  print(f"User: {user_input_2}")
149
  response_2 = chatbot.generate_response(user_input_2)
150
- print(f"Bot: {response_2}")
151
  print("----------------------")
152
 
153
  # Third input with /think
@@ -166,52 +188,61 @@ if __name__ == "__main__":
166
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
167
 
168
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
 
169
  ```python
170
  from qwen_agent.agents import Assistant
171
 
172
  # Define LLM
173
  llm_cfg = {
174
- 'model': 'Qwen3-235B-A22B-MLX-4bit',
175
 
176
  # Use the endpoint provided by Alibaba Model Studio:
177
- # 'model_type': 'qwen_dashscope',
178
- # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
179
 
180
  # Use a custom endpoint compatible with OpenAI API:
181
- 'model_server': 'http://localhost:8000/v1', # api_base
182
- 'api_key': 'EMPTY',
183
 
184
  # Other parameters:
185
- # 'generate_cfg': {
186
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
187
- # # Do not add: When the response has been separated by reasoning_content and content.
188
- # 'thought_in_content': True,
189
- # },
190
  }
191
 
192
  # Define Tools
193
  tools = [
194
- {'mcpServers': { # You can specify the MCP configuration file
195
- 'time': {
196
- 'command': 'uvx',
197
- 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
 
198
  },
199
  "fetch": {
200
  "command": "uvx",
201
- "args": ["mcp-server-fetch"]
202
- }
203
  }
204
  },
205
- 'code_interpreter', # Built-in tools
206
  ]
207
 
208
  # Define Agent
209
  bot = Assistant(llm=llm_cfg, function_list=tools)
210
 
211
  # Streaming generation
212
- messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
 
 
 
 
 
 
213
  for responses in bot.run(messages=messages):
214
  pass
 
215
  print(responses)
216
  ```
217
 
 
5
  pipeline_tag: text-generation
6
  ---
7
 
8
+ # Qwen3-235B-A22B-MLX-4bit
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
 
48
  ```
49
 
50
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
51
+
52
  ```python
53
  from mlx_lm import load, generate
54
+
55
  model, tokenizer = load("Qwen/Qwen3-235B-A22B-MLX-4bit")
56
+ prompt = "Hello, please introduce yourself and tell me what you can do."
57
+
58
  if tokenizer.chat_template is not None:
59
  messages = [{"role": "user", "content": prompt}]
60
  prompt = tokenizer.apply_chat_template(
61
+ messages,
62
+ add_generation_prompt=True
63
  )
64
+
65
+ response = generate(
66
+ model,
67
+ tokenizer,
68
+ prompt=prompt,
69
+ verbose=True,
70
+ max_tokens=1024
71
+ )
72
+
73
+ print(response)
74
  ```
75
 
76
  ## Switching Between Thinking and Non-Thinking Mode
 
124
 
125
  ```python
126
  from mlx_lm import load, generate
127
+
128
+
129
  class QwenChatbot:
130
  def __init__(self, model_name="Qwen/Qwen3-235B-A22B-MLX-4bit"):
131
  self.model, self.tokenizer = load(model_name)
 
140
  add_generation_prompt=True
141
  )
142
 
143
+ response = generate(
144
+ self.model,
145
+ self.tokenizer,
146
+ prompt=text,
147
+ verbose=True,
148
+ max_tokens=32768
149
+ )
150
  # Update history
151
  self.history.append({"role": "user", "content": user_input})
152
  self.history.append({"role": "assistant", "content": response})
153
 
154
  return response
155
 
156
+
157
  # Example Usage
158
  if __name__ == "__main__":
159
  chatbot = QwenChatbot()
160
 
161
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
162
+ user_input_1 = "How many 'r's are in strawberries?"
163
  print(f"User: {user_input_1}")
164
  response_1 = chatbot.generate_response(user_input_1)
165
  print(f"Bot: {response_1}")
166
  print("----------------------")
167
 
168
  # Second input with /no_think
169
+ user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
170
  print(f"User: {user_input_2}")
171
  response_2 = chatbot.generate_response(user_input_2)
172
+ print(f"Bot: {response_2}")
173
  print("----------------------")
174
 
175
  # Third input with /think
 
188
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
189
 
190
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
191
+
192
  ```python
193
  from qwen_agent.agents import Assistant
194
 
195
  # Define LLM
196
  llm_cfg = {
197
+ "model": "Qwen3-235B-A22B-MLX-4bit",
198
 
199
  # Use the endpoint provided by Alibaba Model Studio:
200
+ # "model_type": "qwen_dashscope",
201
+ # "api_key": os.getenv("DASHSCOPE_API_KEY"),
202
 
203
  # Use a custom endpoint compatible with OpenAI API:
204
+ "model_server": "http://localhost:8000/v1", # api_base
205
+ "api_key": "EMPTY",
206
 
207
  # Other parameters:
208
+ # "generate_cfg": {
209
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
210
+ # # Do not add: When the response has been separated by reasoning_content and content.
211
+ # "thought_in_content": True,
212
+ # },
213
  }
214
 
215
  # Define Tools
216
  tools = [
217
+ {
218
+ "mcpServers": { # You can specify the MCP configuration file
219
+ "time": {
220
+ "command": "uvx",
221
+ "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"],
222
  },
223
  "fetch": {
224
  "command": "uvx",
225
+ "args": ["mcp-server-fetch"],
226
+ },
227
  }
228
  },
229
+ "code_interpreter", # Built-in tools
230
  ]
231
 
232
  # Define Agent
233
  bot = Assistant(llm=llm_cfg, function_list=tools)
234
 
235
  # Streaming generation
236
+ messages = [
237
+ {
238
+ "role": "user",
239
+ "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen",
240
+ }
241
+ ]
242
+
243
  for responses in bot.run(messages=messages):
244
  pass
245
+
246
  print(responses)
247
  ```
248