rphrp1985 commited on
Commit
126e605
·
verified ·
1 Parent(s): 0080074

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -45,10 +45,10 @@ def respond(
45
  # attn_implementation="flash_attention_2",
46
  # low_cpu_mem_usage=True,
47
  # llm_int8_enable_fp32_cpu_offload=True,
48
- device_map="auto"
49
  )
50
  messages = [{"role": "user", "content": "Hello, how are you?"}]
51
- input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
52
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
53
 
54
  gen_tokens = model.generate(
 
45
  # attn_implementation="flash_attention_2",
46
  # low_cpu_mem_usage=True,
47
  # llm_int8_enable_fp32_cpu_offload=True,
48
+ device_map="cuda"
49
  )
50
  messages = [{"role": "user", "content": "Hello, how are you?"}]
51
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
52
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
53
 
54
  gen_tokens = model.generate(