Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -125,10 +125,32 @@ class QwenRecapAgent:
|
|
125 |
self.retry_delay = retry_delay
|
126 |
self.device = device_map
|
127 |
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
if hf_token:
|
131 |
model_kwargs["token"] = hf_token
|
|
|
132 |
self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
|
133 |
if device_map != "auto":
|
134 |
self.model.to(device_map)
|
@@ -180,7 +202,7 @@ Elaborate on each core requirement to create a rich description.
|
|
180 |
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
|
181 |
|
182 |
with torch.no_grad():
|
183 |
-
generated_ids = self.model.generate(**model_inputs, max_new_tokens=
|
184 |
|
185 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
186 |
full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
|
|
|
125 |
self.retry_delay = retry_delay
|
126 |
self.device = device_map
|
127 |
|
128 |
+
# 强制使用 Fast Tokenizer
|
129 |
+
try:
|
130 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
131 |
+
model_path,
|
132 |
+
token=hf_token,
|
133 |
+
use_fast=True, # 强制使用 fast tokenizer
|
134 |
+
trust_remote_code=True
|
135 |
+
)
|
136 |
+
logging.info("Successfully loaded fast tokenizer")
|
137 |
+
except Exception as e:
|
138 |
+
logging.warning(f"Fast tokenizer failed, falling back to slow: {e}")
|
139 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
140 |
+
model_path,
|
141 |
+
token=hf_token,
|
142 |
+
use_fast=False,
|
143 |
+
trust_remote_code=True
|
144 |
+
)
|
145 |
+
|
146 |
+
model_kwargs = {
|
147 |
+
"torch_dtype": torch.bfloat16,
|
148 |
+
"device_map": device_map if device_map == "auto" else None,
|
149 |
+
"trust_remote_code": True
|
150 |
+
}
|
151 |
if hf_token:
|
152 |
model_kwargs["token"] = hf_token
|
153 |
+
|
154 |
self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
|
155 |
if device_map != "auto":
|
156 |
self.model.to(device_map)
|
|
|
202 |
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
|
203 |
|
204 |
with torch.no_grad():
|
205 |
+
generated_ids = self.model.generate(**model_inputs, max_new_tokens=4096, temperature=0.6)
|
206 |
|
207 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
208 |
full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
|