super-light-studio / models.py
AverageAiLiker's picture
Deploy Gradio app with multiple files
838951c verified
import torch
from diffusers import DiffusionPipeline
import spaces
from config import MODEL_ID
def load_pipeline():
"""
Load and configure the Open-Sora-v2 pipeline
"""
try:
# Load the pipeline with appropriate configuration
pipeline = DiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True
)
# Move to GPU if available
if torch.cuda.is_available():
pipeline = pipeline.to("cuda")
# Enable memory efficient attention if available
try:
pipeline.enable_xformers_memory_efficient_attention()
except Exception:
print("xformers not available, using default attention")
# Enable CPU offloading for memory efficiency
pipeline.enable_model_cpu_offload()
return pipeline
except Exception as e:
print(f"Error loading pipeline: {e}")
raise
@spaces.GPU(duration=1500)
def compile_transformer():
"""
Optional: Compile the transformer for better performance
This is experimental and may not work with all models
"""
try:
pipeline = load_pipeline()
# Capture example inputs
with spaces.aoti_capture(pipeline.transformer) as call:
pipeline("test prompt generation")
# Export the model
exported = torch.export.export(
pipeline.transformer,
args=call.args,
kwargs=call.kwargs,
)
# Compile the exported model
compiled_transformer = spaces.aoti_compile(exported)
# Apply compiled model to pipeline
spaces.aoti_apply(compiled_transformer, pipeline.transformer)
return pipeline
except Exception as e:
print(f"Compilation failed, using unoptimized model: {e}")
return load_pipeline()