JustinLin610 commited on
Commit
5bb0d2d
·
verified ·
1 Parent(s): a0a2181

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -26
README.md CHANGED
@@ -5,7 +5,7 @@ license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
5
  pipeline_tag: text-generation
6
  ---
7
 
8
- # Qwen3-30B-A3B
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
@@ -47,16 +47,29 @@ KeyError: 'qwen3'
47
  ```
48
 
49
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
 
50
  ```python
51
  from mlx_lm import load, generate
 
52
  model, tokenizer = load("Qwen/Qwen3-30B-A3B-MLX-8bit")
53
- prompt = "hello, Introduce yourself, and what can you do ?"
 
54
  if tokenizer.chat_template is not None:
55
  messages = [{"role": "user", "content": prompt}]
56
  prompt = tokenizer.apply_chat_template(
57
- messages, add_generation_prompt=True
 
58
  )
59
- response = generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=1024)
 
 
 
 
 
 
 
 
 
60
  ```
61
 
62
  ## Switching Between Thinking and Non-Thinking Mode
@@ -110,6 +123,8 @@ Here is an example of a multi-turn conversation:
110
 
111
  ```python
112
  from mlx_lm import load, generate
 
 
113
  class QwenChatbot:
114
  def __init__(self, model_name="Qwen/Qwen3-30B-A3B-MLX-8bit"):
115
  self.model, self.tokenizer = load(model_name)
@@ -124,29 +139,36 @@ class QwenChatbot:
124
  add_generation_prompt=True
125
  )
126
 
127
- response = generate(self.model, self.tokenizer, prompt=text, verbose=True, max_tokens=32768)
 
 
 
 
 
 
128
  # Update history
129
  self.history.append({"role": "user", "content": user_input})
130
  self.history.append({"role": "assistant", "content": response})
131
 
132
  return response
133
 
 
134
  # Example Usage
135
  if __name__ == "__main__":
136
  chatbot = QwenChatbot()
137
 
138
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
139
- user_input_1 = "How many r's in strawberries?"
140
  print(f"User: {user_input_1}")
141
  response_1 = chatbot.generate_response(user_input_1)
142
  print(f"Bot: {response_1}")
143
  print("----------------------")
144
 
145
  # Second input with /no_think
146
- user_input_2 = "Then, how many r's in blueberries? /no_think"
147
  print(f"User: {user_input_2}")
148
  response_2 = chatbot.generate_response(user_input_2)
149
- print(f"Bot: {response_2}")
150
  print("----------------------")
151
 
152
  # Third input with /think
@@ -165,52 +187,61 @@ if __name__ == "__main__":
165
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
166
 
167
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
 
168
  ```python
169
  from qwen_agent.agents import Assistant
170
 
171
  # Define LLM
172
  llm_cfg = {
173
- 'model': 'Qwen3-30B-A3B-MLX-8bit',
174
 
175
  # Use the endpoint provided by Alibaba Model Studio:
176
- # 'model_type': 'qwen_dashscope',
177
- # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
178
 
179
  # Use a custom endpoint compatible with OpenAI API:
180
- 'model_server': 'http://localhost:8000/v1', # api_base
181
- 'api_key': 'EMPTY',
182
 
183
  # Other parameters:
184
- # 'generate_cfg': {
185
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
186
- # # Do not add: When the response has been separated by reasoning_content and content.
187
- # 'thought_in_content': True,
188
- # },
189
  }
190
 
191
  # Define Tools
192
  tools = [
193
- {'mcpServers': { # You can specify the MCP configuration file
194
- 'time': {
195
- 'command': 'uvx',
196
- 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
 
197
  },
198
  "fetch": {
199
  "command": "uvx",
200
- "args": ["mcp-server-fetch"]
201
- }
202
  }
203
  },
204
- 'code_interpreter', # Built-in tools
205
  ]
206
 
207
  # Define Agent
208
  bot = Assistant(llm=llm_cfg, function_list=tools)
209
 
210
  # Streaming generation
211
- messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
 
 
 
 
 
 
212
  for responses in bot.run(messages=messages):
213
  pass
 
214
  print(responses)
215
  ```
216
 
 
5
  pipeline_tag: text-generation
6
  ---
7
 
8
+ # Qwen3-30B-A3B-MLX-8bit
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
 
47
  ```
48
 
49
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
50
+
51
  ```python
52
  from mlx_lm import load, generate
53
+
54
  model, tokenizer = load("Qwen/Qwen3-30B-A3B-MLX-8bit")
55
+ prompt = "Hello, please introduce yourself and tell me what you can do."
56
+
57
  if tokenizer.chat_template is not None:
58
  messages = [{"role": "user", "content": prompt}]
59
  prompt = tokenizer.apply_chat_template(
60
+ messages,
61
+ add_generation_prompt=True
62
  )
63
+
64
+ response = generate(
65
+ model,
66
+ tokenizer,
67
+ prompt=prompt,
68
+ verbose=True,
69
+ max_tokens=1024
70
+ )
71
+
72
+ print(response)
73
  ```
74
 
75
  ## Switching Between Thinking and Non-Thinking Mode
 
123
 
124
  ```python
125
  from mlx_lm import load, generate
126
+
127
+
128
  class QwenChatbot:
129
  def __init__(self, model_name="Qwen/Qwen3-30B-A3B-MLX-8bit"):
130
  self.model, self.tokenizer = load(model_name)
 
139
  add_generation_prompt=True
140
  )
141
 
142
+ response = generate(
143
+ self.model,
144
+ self.tokenizer,
145
+ prompt=text,
146
+ verbose=True,
147
+ max_tokens=32768
148
+ )
149
  # Update history
150
  self.history.append({"role": "user", "content": user_input})
151
  self.history.append({"role": "assistant", "content": response})
152
 
153
  return response
154
 
155
+
156
  # Example Usage
157
  if __name__ == "__main__":
158
  chatbot = QwenChatbot()
159
 
160
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
161
+ user_input_1 = "How many 'r's are in strawberries?"
162
  print(f"User: {user_input_1}")
163
  response_1 = chatbot.generate_response(user_input_1)
164
  print(f"Bot: {response_1}")
165
  print("----------------------")
166
 
167
  # Second input with /no_think
168
+ user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
169
  print(f"User: {user_input_2}")
170
  response_2 = chatbot.generate_response(user_input_2)
171
+ print(f"Bot: {response_2}")
172
  print("----------------------")
173
 
174
  # Third input with /think
 
187
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
188
 
189
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
190
+
191
  ```python
192
  from qwen_agent.agents import Assistant
193
 
194
  # Define LLM
195
  llm_cfg = {
196
+ "model": "Qwen3-30B-A3B-MLX-8bit",
197
 
198
  # Use the endpoint provided by Alibaba Model Studio:
199
+ # "model_type": "qwen_dashscope",
200
+ # "api_key": os.getenv("DASHSCOPE_API_KEY"),
201
 
202
  # Use a custom endpoint compatible with OpenAI API:
203
+ "model_server": "http://localhost:8000/v1", # api_base
204
+ "api_key": "EMPTY",
205
 
206
  # Other parameters:
207
+ # "generate_cfg": {
208
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
209
+ # # Do not add: When the response has been separated by reasoning_content and content.
210
+ # "thought_in_content": True,
211
+ # },
212
  }
213
 
214
  # Define Tools
215
  tools = [
216
+ {
217
+ "mcpServers": { # You can specify the MCP configuration file
218
+ "time": {
219
+ "command": "uvx",
220
+ "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"],
221
  },
222
  "fetch": {
223
  "command": "uvx",
224
+ "args": ["mcp-server-fetch"],
225
+ },
226
  }
227
  },
228
+ "code_interpreter", # Built-in tools
229
  ]
230
 
231
  # Define Agent
232
  bot = Assistant(llm=llm_cfg, function_list=tools)
233
 
234
  # Streaming generation
235
+ messages = [
236
+ {
237
+ "role": "user",
238
+ "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen",
239
+ }
240
+ ]
241
+
242
  for responses in bot.run(messages=messages):
243
  pass
244
+
245
  print(responses)
246
  ```
247