Spaces:
Running
on
Zero
Running
on
Zero
Remove sequential CPU offload from model loading and simplify return value in safe_model_load function
Browse files- model_loader.py +2 -7
model_loader.py
CHANGED
|
@@ -39,7 +39,7 @@ def safe_model_load():
|
|
| 39 |
print(f"XFormers not available: {e}")
|
| 40 |
|
| 41 |
pipe.enable_attention_slicing()
|
| 42 |
-
pipe.enable_sequential_cpu_offload()
|
| 43 |
pipe.to("cuda")
|
| 44 |
|
| 45 |
# For memory-sensitive environments
|
|
@@ -47,13 +47,8 @@ def safe_model_load():
|
|
| 47 |
torch.multiprocessing.set_sharing_strategy('file_system')
|
| 48 |
except Exception as e:
|
| 49 |
print(f"Exception raised (torch.multiprocessing): {e}")
|
| 50 |
-
# Moondream
|
| 51 |
-
model = vl(api_key=md_api_key)
|
| 52 |
|
| 53 |
-
return
|
| 54 |
-
"pipeline": pipe,
|
| 55 |
-
"captioner": model
|
| 56 |
-
}
|
| 57 |
|
| 58 |
except Exception as e:
|
| 59 |
print(f"Model loading failed: {e}")
|
|
|
|
| 39 |
print(f"XFormers not available: {e}")
|
| 40 |
|
| 41 |
pipe.enable_attention_slicing()
|
| 42 |
+
# pipe.enable_sequential_cpu_offload()
|
| 43 |
pipe.to("cuda")
|
| 44 |
|
| 45 |
# For memory-sensitive environments
|
|
|
|
| 47 |
torch.multiprocessing.set_sharing_strategy('file_system')
|
| 48 |
except Exception as e:
|
| 49 |
print(f"Exception raised (torch.multiprocessing): {e}")
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
return pipe
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
except Exception as e:
|
| 54 |
print(f"Model loading failed: {e}")
|