Ephemeral182 commited on
Commit
7f0949a
·
verified ·
1 Parent(s): 9c6f182

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -125,10 +125,32 @@ class QwenRecapAgent:
125
  self.retry_delay = retry_delay
126
  self.device = device_map
127
 
128
- self.tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
129
- model_kwargs = {"torch_dtype": torch.bfloat16, "device_map": device_map if device_map == "auto" else None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  if hf_token:
131
  model_kwargs["token"] = hf_token
 
132
  self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
133
  if device_map != "auto":
134
  self.model.to(device_map)
@@ -180,7 +202,7 @@ Elaborate on each core requirement to create a rich description.
180
  model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
181
 
182
  with torch.no_grad():
183
- generated_ids = self.model.generate(**model_inputs, max_new_tokens=1024, temperature=0.6)
184
 
185
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
186
  full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
 
125
  self.retry_delay = retry_delay
126
  self.device = device_map
127
 
128
+ # 强制使用 Fast Tokenizer
129
+ try:
130
+ self.tokenizer = AutoTokenizer.from_pretrained(
131
+ model_path,
132
+ token=hf_token,
133
+ use_fast=True, # 强制使用 fast tokenizer
134
+ trust_remote_code=True
135
+ )
136
+ logging.info("Successfully loaded fast tokenizer")
137
+ except Exception as e:
138
+ logging.warning(f"Fast tokenizer failed, falling back to slow: {e}")
139
+ self.tokenizer = AutoTokenizer.from_pretrained(
140
+ model_path,
141
+ token=hf_token,
142
+ use_fast=False,
143
+ trust_remote_code=True
144
+ )
145
+
146
+ model_kwargs = {
147
+ "torch_dtype": torch.bfloat16,
148
+ "device_map": device_map if device_map == "auto" else None,
149
+ "trust_remote_code": True
150
+ }
151
  if hf_token:
152
  model_kwargs["token"] = hf_token
153
+
154
  self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
155
  if device_map != "auto":
156
  self.model.to(device_map)
 
202
  model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
203
 
204
  with torch.no_grad():
205
+ generated_ids = self.model.generate(**model_inputs, max_new_tokens=4096, temperature=0.6)
206
 
207
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
208
  full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)