rphrp1985 commited on
Commit
0d02f18
·
verified ·
1 Parent(s): 4ad0753

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
4
  import subprocess
5
  from huggingface_hub import InferenceClient
6
  import os
@@ -46,7 +47,7 @@ with init_empty_weights():
46
  attn_implementation="flash_attention_2",
47
  # low_cpu_mem_usage=True,
48
  # llm_int8_enable_fp32_cpu_offload=True,
49
- device_map="cuda",
50
 
51
  )
52
 
@@ -57,7 +58,9 @@ with init_empty_weights():
57
  device_map = infer_auto_device_map(model, max_memory={0: "80GB", 1: "80GB", "cpu": "65GB"})
58
 
59
  # Load the model with the inferred device map
60
- model = load_checkpoint_and_dispatch(model, "path_to_checkpoint", device_map=device_map, no_split_module_classes=["GPTJBlock"])
 
 
61
 
62
 
63
  @spaces.GPU(duration=60)
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ from torch.cuda.amp import autocast
5
  import subprocess
6
  from huggingface_hub import InferenceClient
7
  import os
 
47
  attn_implementation="flash_attention_2",
48
  # low_cpu_mem_usage=True,
49
  # llm_int8_enable_fp32_cpu_offload=True,
50
+ # device_map="cuda",
51
 
52
  )
53
 
 
58
  device_map = infer_auto_device_map(model, max_memory={0: "80GB", 1: "80GB", "cpu": "65GB"})
59
 
60
  # Load the model with the inferred device map
61
+ model = load_checkpoint_and_dispatch(model, "path_to_checkpoint", device_map=device_map)
62
+ model.half()
63
+
64
 
65
 
66
  @spaces.GPU(duration=60)