Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
@@ -10,10 +10,11 @@ print("Loading model... This may take a few minutes.")
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
model_name_or_path,
|
13 |
-
torch_dtype=torch.bfloat16
|
14 |
device_map="auto"
|
15 |
)
|
16 |
|
|
|
17 |
def respond(message, history, system_message=None, max_tokens=None, temperature=None, top_p=None):
|
18 |
"""
|
19 |
Generate response from Hunyuan-MT model
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
model_name_or_path,
|
13 |
+
torch_dtype=torch.bfloat16,
|
14 |
device_map="auto"
|
15 |
)
|
16 |
|
17 |
+
@spaces.GPU(duration=120)
|
18 |
def respond(message, history, system_message=None, max_tokens=None, temperature=None, top_p=None):
|
19 |
"""
|
20 |
Generate response from Hunyuan-MT model
|